{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "31f3e96f",
   "metadata": {},
   "source": [
    "# Use AlphaZero to Play Tic-Tac-Toe\n",
    "\n",
    "PyTorch version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ac9ccbcb",
   "metadata": {},
   "outputs": [],
   "source": [
    "import collections\n",
    "import math\n",
    "import logging\n",
    "import sys\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "import torch\n",
    "torch.manual_seed(0)\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "import torch.nn.functional as functional\n",
    "\n",
    "import boardgame2\n",
    "from boardgame2 import BLACK, WHITE\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2e901bad",
   "metadata": {},
   "source": [
    "Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "9eaa1107",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:10:57 [INFO] allow_pass: True\n",
      "00:10:57 [INFO] illegal_equivalent_action: [-1  0]\n",
      "00:10:57 [INFO] render_characters: {0: '+', 1: 'o', -1: 'x'}\n",
      "00:10:57 [INFO] board: [[0. 0. 0.]\n",
      " [0. 0. 0.]\n",
      " [0. 0. 0.]]\n",
      "00:10:57 [INFO] observation_space: Tuple(Box(-1, 1, (3, 3), int8), Box(-1, 1, (), int8))\n",
      "00:10:57 [INFO] action_space: Box(-1, 2, (2,), int8)\n",
      "00:10:57 [INFO] target_length: 3\n",
      "00:10:57 [INFO] spec: EnvSpec(TicTacToe-v0)\n",
      "00:10:57 [INFO] id: TicTacToe-v0\n",
      "00:10:57 [INFO] entry_point: boardgame2:KInARowEnv\n",
      "00:10:57 [INFO] reward_threshold: None\n",
      "00:10:57 [INFO] nondeterministic: False\n",
      "00:10:57 [INFO] max_episode_steps: None\n",
      "00:10:57 [INFO] _kwargs: {'board_shape': 3, 'target_length': 3}\n",
      "00:10:57 [INFO] _env_name: TicTacToe\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('TicTacToe-v0')\n",
    "env.seed(0)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d7eb6e8d",
   "metadata": {},
   "source": [
    "Agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "18e57c1d",
   "metadata": {},
   "outputs": [],
   "source": [
    "class AlphaZeroReplayer:\n",
    "    def __init__(self):\n",
    "        self.fields = ['player', 'board', 'prob', 'winner']\n",
    "        self.memory = pd.DataFrame(columns=self.fields)\n",
    "\n",
    "    def store(self, df):\n",
    "        self.memory = pd.concat([self.memory, df[self.fields]], ignore_index=True)\n",
    "\n",
    "    def sample(self, size):\n",
    "        indices = np.random.choice(self.memory.shape[0], size=size)\n",
    "        return (np.stack(self.memory.loc[indices, field]) for field in\n",
    "                self.fields)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ac089153",
   "metadata": {},
   "outputs": [],
   "source": [
    "class AlphaZeroNet(nn.Module):\n",
    "    def __init__(self, input_shape):\n",
    "        super().__init__()\n",
    "\n",
    "        self.input_shape = input_shape\n",
    "\n",
    "        # common net\n",
    "        self.input_net = nn.Sequential(\n",
    "                nn.Conv2d(1, 256, kernel_size=3, padding=\"same\"),\n",
    "                nn.BatchNorm2d(256), nn.ReLU())\n",
    "        self.residual_nets = [nn.Sequential(\n",
    "                nn.Conv2d(256, 256, kernel_size=3, padding=\"same\"),\n",
    "                nn.BatchNorm2d(256)) for _ in range(2)]\n",
    "\n",
    "        # probability net\n",
    "        self.prob_net = nn.Sequential(\n",
    "                nn.Conv2d(256, 256, kernel_size=3, padding=\"same\"),\n",
    "                nn.BatchNorm2d(256), nn.ReLU(),\n",
    "                nn.Conv2d(256, 1, kernel_size=3, padding=\"same\"))\n",
    "\n",
    "        # value net\n",
    "        self.value_net0 = nn.Sequential(\n",
    "                nn.Conv2d(256, 1, kernel_size=3, padding=\"same\"),\n",
    "                nn.BatchNorm2d(1), nn.ReLU())\n",
    "        self.value_net1 = nn.Sequential(\n",
    "                nn.Linear(np.prod(input_shape), 1), nn.Tanh())\n",
    "\n",
    "\n",
    "    def forward(self, board_tensor):\n",
    "        # common net\n",
    "        input_tensor = board_tensor.view(-1, 1, *self.input_shape)\n",
    "        x = self.input_net(input_tensor)\n",
    "        for i_net, residual_net in enumerate(self.residual_nets):\n",
    "            y = residual_net(x)\n",
    "            if i_net == len(self.residual_nets) - 1:\n",
    "                y = y + x\n",
    "            x = torch.clamp(y, 0)\n",
    "        common_feature_tensor = x\n",
    "\n",
    "        # probability net\n",
    "        logit_tensor = self.prob_net(common_feature_tensor)\n",
    "        logit_flatten_tensor = logit_tensor.view(-1)\n",
    "        prob_flatten_tensor = functional.softmax(logit_flatten_tensor, dim=-1)\n",
    "        prob_tensor = prob_flatten_tensor.view(-1, *self.input_shape)\n",
    "\n",
    "        # value net\n",
    "        v_feature_tensor = self.value_net0(common_feature_tensor)\n",
    "        v_flatten_tensor = v_feature_tensor.view(-1, np.prod(self.input_shape))\n",
    "        v_tensor = self.value_net1(v_flatten_tensor)\n",
    "\n",
    "        return prob_tensor, v_tensor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "1a8e2759",
   "metadata": {},
   "outputs": [],
   "source": [
    "class AlphaZeroAgent:\n",
    "    def __init__(self, env):\n",
    "        self.env = env\n",
    "        self.board = np.zeros_like(env.board)\n",
    "        self.reset_mcts()\n",
    "\n",
    "        self.replayer = AlphaZeroReplayer()\n",
    "\n",
    "        self.net = AlphaZeroNet(input_shape=self.board.shape)\n",
    "        self.prob_loss = nn.BCELoss()\n",
    "        self.v_loss = nn.MSELoss()\n",
    "        self.optimizer = optim.Adam(self.net.parameters(), 1e-3,\n",
    "                weight_decay=1e-4)\n",
    "\n",
    "\n",
    "    def reset_mcts(self):\n",
    "        def zero_board_factory(): # for construct default_dict\n",
    "            return np.zeros_like(self.board, dtype=float)\n",
    "        self.q = collections.defaultdict(zero_board_factory)\n",
    "            # q estimates: board -> board\n",
    "        self.count = collections.defaultdict(zero_board_factory)\n",
    "            # q count visitation: board -> board\n",
    "        self.policy = {} # policy: board -> board\n",
    "        self.valid = {} # valid position: board -> board\n",
    "        self.winner = {} # winner: board -> None or int\n",
    "\n",
    "    def reset(self, mode):\n",
    "        self.mode = mode\n",
    "        if mode == \"train\":\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, winner, _):\n",
    "        board, player = observation\n",
    "        canonical_board = player * board\n",
    "        s = boardgame2.strfboard(canonical_board)\n",
    "        while self.count[s].sum() < 200: # conduct MCTS 200 times\n",
    "            self.search(canonical_board, prior_noise=True)\n",
    "        prob = self.count[s] / self.count[s].sum()\n",
    "\n",
    "        # sample\n",
    "        location_index = np.random.choice(prob.size, p=prob.reshape(-1))\n",
    "        action = np.unravel_index(location_index, prob.shape)\n",
    "\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory += [player, board, prob, winner]\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        if self.mode == 'train':\n",
    "            self.save_trajectory_to_replayer()\n",
    "            if len(self.replayer.memory) >= 1000:\n",
    "                for batch in range(2): # learn multiple times\n",
    "                    self.learn()\n",
    "                self.replayer = AlphaZeroReplayer() # reset replayer after the agent changes itself\n",
    "                self.reset_mcts()\n",
    "\n",
    "    def save_trajectory_to_replayer(self):\n",
    "        df = pd.DataFrame(\n",
    "                np.array(self.trajectory, dtype=object).reshape(-1, 4),\n",
    "                columns=['player', 'board', 'prob', 'winner'], dtype=object)\n",
    "        winner = self.trajectory[-1]\n",
    "        df['winner'] = winner\n",
    "        self.replayer.store(df)\n",
    "\n",
    "    def search(self, board, prior_noise=False): # MCTS\n",
    "        s = boardgame2.strfboard(board)\n",
    "\n",
    "        if s not in self.winner:\n",
    "            self.winner[s] = self.env.get_winner((board, BLACK))\n",
    "        if self.winner[s] is not None: # if there is a winner\n",
    "            return self.winner[s]\n",
    "\n",
    "        if s not in self.policy: # leaf that has not calculate the policy\n",
    "            board_tensor = torch.as_tensor(board, dtype=torch.float).view(1, 1,\n",
    "                    *self.board.shape)\n",
    "            pi_tensor, v_tensor = self.net(board_tensor)\n",
    "            pi = pi_tensor.detach().numpy()[0]\n",
    "            v = v_tensor.detach().numpy()[0]\n",
    "            valid = self.env.get_valid((board, BLACK))\n",
    "            masked_pi = pi * valid\n",
    "            total_masked_pi = np.sum(masked_pi)\n",
    "            if total_masked_pi <= 0:\n",
    "                # all valid actions do not have probabilities. rarely occur\n",
    "                masked_pi = valid # workaround\n",
    "                total_masked_pi = np.sum(masked_pi)\n",
    "            self.policy[s] = masked_pi / total_masked_pi\n",
    "            self.valid[s] = valid\n",
    "            return v\n",
    "\n",
    "        # calculate PUCT\n",
    "        count_sum = self.count[s].sum()\n",
    "        c_init = 1.25\n",
    "        c_base = 19652.\n",
    "        coef = (c_init + np.log1p((1 + count_sum) / c_base)) * \\\n",
    "                math.sqrt(count_sum) / (1. + self.count[s])\n",
    "        if prior_noise:\n",
    "            alpha = 1. / self.valid[s].sum()\n",
    "            noise = np.random.gamma(alpha, 1., board.shape)\n",
    "            noise *= self.valid[s]\n",
    "            noise /= noise.sum()\n",
    "            prior_exploration_fraction=0.25\n",
    "            prior = (1. - prior_exploration_fraction) * self.policy[s] \\\n",
    "                    + prior_exploration_fraction * noise\n",
    "        else:\n",
    "            prior = self.policy[s]\n",
    "        ub = np.where(self.valid[s], self.q[s] + coef * prior, np.nan)\n",
    "        location_index = np.nanargmax(ub)\n",
    "        location = np.unravel_index(location_index, board.shape)\n",
    "\n",
    "        (next_board, next_player), _, _, _ = self.env.next_step(\n",
    "                (board, BLACK), np.array(location))\n",
    "        next_canonical_board = next_player * next_board\n",
    "        next_v = self.search(next_canonical_board) # recursive\n",
    "        v = next_player * next_v\n",
    "\n",
    "        self.count[s][location] += 1\n",
    "        self.q[s][location] += (v - self.q[s][location]) / \\\n",
    "                self.count[s][location]\n",
    "        return v\n",
    "\n",
    "    def learn(self):\n",
    "        players, boards, probs, winners = self.replayer.sample(64)\n",
    "        canonical_boards = players[:, np.newaxis, np.newaxis] * boards\n",
    "        targets = (players * winners)[:, np.newaxis]\n",
    "\n",
    "        target_prob_tensor = torch.as_tensor(probs, dtype=torch.float)\n",
    "        canonical_board_tensor = torch.as_tensor(canonical_boards, dtype=torch.float)\n",
    "        target_tensor = torch.as_tensor(targets, dtype=torch.float)\n",
    "\n",
    "        prob_tensor, v_tensor = self.net(canonical_board_tensor)\n",
    "\n",
    "        flatten_target_prob_tensor = target_prob_tensor.view(-1, self.board.size)\n",
    "        flatten_prob_tensor = prob_tensor.view(-1, self.board.size)\n",
    "        prob_loss_tensor = self.prob_loss(flatten_prob_tensor,\n",
    "                flatten_target_prob_tensor)\n",
    "        v_loss_tensor = self.v_loss(v_tensor, target_tensor)\n",
    "        loss_tensor = prob_loss_tensor + v_loss_tensor\n",
    "        self.optimizer.zero_grad()\n",
    "        loss_tensor.backward()\n",
    "        self.optimizer.step()\n",
    "\n",
    "\n",
    "agent = AlphaZeroAgent(env=env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "d3486aa0",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:11:21 [INFO] train episode 0: winner = -1, steps = 5\n",
      "00:11:38 [INFO] train episode 1: winner = 1, steps = 6\n",
      "00:11:50 [INFO] train episode 2: winner = 1, steps = 6\n",
      "00:11:53 [INFO] train episode 3: winner = 1, steps = 6\n",
      "00:12:12 [INFO] train episode 4: winner = 0, steps = 8\n",
      "00:12:15 [INFO] train episode 5: winner = 1, steps = 4\n",
      "00:12:35 [INFO] train episode 6: winner = 0, steps = 8\n",
      "00:12:47 [INFO] train episode 7: winner = 1, steps = 6\n",
      "00:12:56 [INFO] train episode 8: winner = 1, steps = 6\n",
      "00:13:07 [INFO] train episode 9: winner = 1, steps = 6\n",
      "00:13:14 [INFO] train episode 10: winner = 1, steps = 4\n",
      "00:13:22 [INFO] train episode 11: winner = 1, steps = 4\n",
      "00:13:23 [INFO] train episode 12: winner = 0, steps = 8\n",
      "00:13:27 [INFO] train episode 13: winner = 0, steps = 8\n",
      "00:13:38 [INFO] train episode 14: winner = 0, steps = 8\n",
      "00:13:41 [INFO] train episode 15: winner = 1, steps = 6\n",
      "00:13:44 [INFO] train episode 16: winner = 1, steps = 6\n",
      "00:13:49 [INFO] train episode 17: winner = -1, steps = 5\n",
      "00:13:52 [INFO] train episode 18: winner = 1, steps = 6\n",
      "00:13:56 [INFO] train episode 19: winner = 0, steps = 8\n",
      "00:14:00 [INFO] train episode 20: winner = 1, steps = 6\n",
      "00:14:04 [INFO] train episode 21: winner = -1, steps = 5\n",
      "00:14:09 [INFO] train episode 22: winner = 0, steps = 8\n",
      "00:14:11 [INFO] train episode 23: winner = 1, steps = 4\n",
      "00:14:15 [INFO] train episode 24: winner = 0, steps = 8\n",
      "00:14:15 [INFO] train episode 25: winner = 1, steps = 4\n",
      "00:14:20 [INFO] train episode 26: winner = -1, steps = 7\n",
      "00:14:20 [INFO] train episode 27: winner = 1, steps = 4\n",
      "00:14:21 [INFO] train episode 28: winner = 1, steps = 4\n",
      "00:14:23 [INFO] train episode 29: winner = 1, steps = 4\n",
      "00:14:24 [INFO] train episode 30: winner = -1, steps = 5\n",
      "00:14:24 [INFO] train episode 31: winner = 0, steps = 8\n",
      "00:14:26 [INFO] train episode 32: winner = -1, steps = 5\n",
      "00:14:27 [INFO] train episode 33: winner = -1, steps = 7\n",
      "00:14:28 [INFO] train episode 34: winner = 1, steps = 8\n",
      "00:14:28 [INFO] train episode 35: winner = 1, steps = 4\n",
      "00:14:29 [INFO] train episode 36: winner = 0, steps = 8\n",
      "00:14:32 [INFO] train episode 37: winner = 1, steps = 4\n",
      "00:14:33 [INFO] train episode 38: winner = 1, steps = 4\n",
      "00:14:34 [INFO] train episode 39: winner = -1, steps = 5\n",
      "00:14:38 [INFO] train episode 40: winner = 0, steps = 8\n",
      "00:14:40 [INFO] train episode 41: winner = 0, steps = 8\n",
      "00:14:46 [INFO] train episode 42: winner = 0, steps = 8\n",
      "00:14:49 [INFO] train episode 43: winner = 0, steps = 8\n",
      "00:14:51 [INFO] train episode 44: winner = 1, steps = 4\n",
      "00:14:54 [INFO] train episode 45: winner = 1, steps = 6\n",
      "00:14:57 [INFO] train episode 46: winner = -1, steps = 5\n",
      "00:15:03 [INFO] train episode 47: winner = 1, steps = 4\n",
      "00:15:05 [INFO] train episode 48: winner = 1, steps = 4\n",
      "00:15:09 [INFO] train episode 49: winner = -1, steps = 7\n",
      "00:15:11 [INFO] train episode 50: winner = 1, steps = 4\n",
      "00:15:12 [INFO] train episode 51: winner = 1, steps = 4\n",
      "00:15:12 [INFO] train episode 52: winner = 0, steps = 8\n",
      "00:15:12 [INFO] train episode 53: winner = -1, steps = 5\n",
      "00:15:13 [INFO] train episode 54: winner = 1, steps = 6\n",
      "00:15:13 [INFO] train episode 55: winner = 1, steps = 6\n",
      "00:15:14 [INFO] train episode 56: winner = 1, steps = 6\n",
      "00:15:15 [INFO] train episode 57: winner = 0, steps = 8\n",
      "00:15:17 [INFO] train episode 58: winner = 1, steps = 4\n",
      "00:15:19 [INFO] train episode 59: winner = 0, steps = 8\n",
      "00:15:19 [INFO] train episode 60: winner = 1, steps = 6\n",
      "00:15:22 [INFO] train episode 61: winner = 1, steps = 4\n",
      "00:15:25 [INFO] train episode 62: winner = 1, steps = 8\n",
      "00:15:25 [INFO] train episode 63: winner = 1, steps = 4\n",
      "00:15:25 [INFO] train episode 64: winner = -1, steps = 7\n",
      "00:15:29 [INFO] train episode 65: winner = 0, steps = 8\n",
      "00:15:30 [INFO] train episode 66: winner = 1, steps = 4\n",
      "00:15:35 [INFO] train episode 67: winner = 0, steps = 8\n",
      "00:15:35 [INFO] train episode 68: winner = 1, steps = 4\n",
      "00:15:36 [INFO] train episode 69: winner = 1, steps = 6\n",
      "00:15:39 [INFO] train episode 70: winner = 0, steps = 8\n",
      "00:15:40 [INFO] train episode 71: winner = 1, steps = 4\n",
      "00:15:40 [INFO] train episode 72: winner = 1, steps = 4\n",
      "00:15:45 [INFO] train episode 73: winner = 1, steps = 6\n",
      "00:15:49 [INFO] train episode 74: winner = 1, steps = 6\n",
      "00:15:49 [INFO] train episode 75: winner = 1, steps = 4\n",
      "00:15:52 [INFO] train episode 76: winner = -1, steps = 5\n",
      "00:15:54 [INFO] train episode 77: winner = 1, steps = 4\n",
      "00:15:55 [INFO] train episode 78: winner = 1, steps = 4\n",
      "00:15:56 [INFO] train episode 79: winner = 1, steps = 6\n",
      "00:15:56 [INFO] train episode 80: winner = 1, steps = 6\n",
      "00:15:57 [INFO] train episode 81: winner = -1, steps = 7\n",
      "00:15:58 [INFO] train episode 82: winner = 1, steps = 6\n",
      "00:16:03 [INFO] train episode 83: winner = 1, steps = 6\n",
      "00:16:05 [INFO] train episode 84: winner = 0, steps = 8\n",
      "00:16:06 [INFO] train episode 85: winner = 1, steps = 4\n",
      "00:16:06 [INFO] train episode 86: winner = 1, steps = 6\n",
      "00:16:07 [INFO] train episode 87: winner = 0, steps = 8\n",
      "00:16:08 [INFO] train episode 88: winner = 1, steps = 4\n",
      "00:16:08 [INFO] train episode 89: winner = 0, steps = 8\n",
      "00:16:08 [INFO] train episode 90: winner = 0, steps = 8\n",
      "00:16:10 [INFO] train episode 91: winner = -1, steps = 5\n",
      "00:16:12 [INFO] train episode 92: winner = 0, steps = 8\n",
      "00:16:16 [INFO] train episode 93: winner = 1, steps = 6\n",
      "00:16:16 [INFO] train episode 94: winner = 1, steps = 4\n",
      "00:16:19 [INFO] train episode 95: winner = -1, steps = 5\n",
      "00:16:21 [INFO] train episode 96: winner = 1, steps = 8\n",
      "00:16:21 [INFO] train episode 97: winner = 1, steps = 6\n",
      "00:16:21 [INFO] train episode 98: winner = 1, steps = 4\n",
      "00:16:24 [INFO] train episode 99: winner = 1, steps = 6\n",
      "00:16:28 [INFO] train episode 100: winner = 1, steps = 8\n",
      "00:16:30 [INFO] train episode 101: winner = 1, steps = 4\n",
      "00:16:31 [INFO] train episode 102: winner = 1, steps = 4\n",
      "00:16:32 [INFO] train episode 103: winner = 1, steps = 6\n",
      "00:16:33 [INFO] train episode 104: winner = 0, steps = 8\n",
      "00:16:37 [INFO] train episode 105: winner = 0, steps = 8\n",
      "00:16:40 [INFO] train episode 106: winner = 0, steps = 8\n",
      "00:16:43 [INFO] train episode 107: winner = 0, steps = 8\n",
      "00:16:45 [INFO] train episode 108: winner = 1, steps = 6\n",
      "00:16:45 [INFO] train episode 109: winner = 0, steps = 8\n",
      "00:16:47 [INFO] train episode 110: winner = 1, steps = 6\n",
      "00:16:48 [INFO] train episode 111: winner = 1, steps = 6\n",
      "00:16:49 [INFO] train episode 112: winner = -1, steps = 7\n",
      "00:16:49 [INFO] train episode 113: winner = 1, steps = 6\n",
      "00:16:50 [INFO] train episode 114: winner = 1, steps = 4\n",
      "00:16:51 [INFO] train episode 115: winner = 1, steps = 6\n",
      "00:16:53 [INFO] train episode 116: winner = -1, steps = 5\n",
      "00:16:54 [INFO] train episode 117: winner = 1, steps = 6\n",
      "00:16:54 [INFO] train episode 118: winner = 1, steps = 6\n",
      "00:16:56 [INFO] train episode 119: winner = 1, steps = 6\n",
      "00:16:59 [INFO] train episode 120: winner = 1, steps = 6\n",
      "00:16:59 [INFO] train episode 121: winner = 1, steps = 4\n",
      "00:16:59 [INFO] train episode 122: winner = 1, steps = 4\n",
      "00:17:02 [INFO] train episode 123: winner = -1, steps = 5\n",
      "00:17:03 [INFO] train episode 124: winner = -1, steps = 7\n",
      "00:17:03 [INFO] train episode 125: winner = 1, steps = 6\n",
      "00:17:03 [INFO] train episode 126: winner = 1, steps = 4\n",
      "00:17:03 [INFO] train episode 127: winner = 1, steps = 4\n",
      "00:17:04 [INFO] train episode 128: winner = -1, steps = 7\n",
      "00:17:06 [INFO] train episode 129: winner = 1, steps = 8\n",
      "00:17:06 [INFO] train episode 130: winner = 1, steps = 4\n",
      "00:17:10 [INFO] train episode 131: winner = 0, steps = 8\n",
      "00:17:10 [INFO] train episode 132: winner = 1, steps = 4\n",
      "00:17:10 [INFO] train episode 133: winner = 1, steps = 4\n",
      "00:17:11 [INFO] train episode 134: winner = 1, steps = 6\n",
      "00:17:11 [INFO] train episode 135: winner = 1, steps = 4\n",
      "00:17:11 [INFO] train episode 136: winner = 0, steps = 8\n",
      "00:17:12 [INFO] train episode 137: winner = 1, steps = 6\n",
      "00:17:12 [INFO] train episode 138: winner = 1, steps = 4\n",
      "00:17:13 [INFO] train episode 139: winner = 1, steps = 6\n",
      "00:17:13 [INFO] train episode 140: winner = 1, steps = 6\n",
      "00:17:15 [INFO] train episode 141: winner = -1, steps = 7\n",
      "00:17:16 [INFO] train episode 142: winner = 1, steps = 4\n",
      "00:17:19 [INFO] train episode 143: winner = 0, steps = 8\n",
      "00:17:20 [INFO] train episode 144: winner = 1, steps = 4\n",
      "00:17:21 [INFO] train episode 145: winner = 1, steps = 6\n",
      "00:17:21 [INFO] test episode 145:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "00:17:31 [INFO] step 0：player 1, action (0, 1)\n",
      "+o+\n",
      "+++\n",
      "+++\n",
      "00:17:40 [INFO] step 1：player -1, action (1, 1)\n",
      "+o+\n",
      "+x+\n",
      "+++\n",
      "00:17:46 [INFO] step 2：player 1, action (2, 0)\n",
      "+o+\n",
      "+x+\n",
      "o++\n",
      "00:17:49 [INFO] step 3：player -1, action (2, 1)\n",
      "+o+\n",
      "+x+\n",
      "ox+\n",
      "00:17:50 [INFO] step 4：player 1, action (2, 2)\n",
      "+o+\n",
      "+x+\n",
      "oxo\n",
      "00:17:51 [INFO] step 5：player -1, action (1, 0)\n",
      "+o+\n",
      "xx+\n",
      "oxo\n",
      "00:17:52 [INFO] step 6：player 1, action (1, 2)\n",
      "+o+\n",
      "xxo\n",
      "oxo\n",
      "00:17:52 [INFO] step 7：player -1, action (0, 2)\n",
      "+ox\n",
      "xxo\n",
      "oxo\n",
      "00:17:52 [INFO] step 8：player 1, action (0, 0)\n",
      "oox\n",
      "xxo\n",
      "oxo\n",
      "00:17:52 [INFO] test episode 145: winner = 0, steps = 8\n",
      "00:18:09 [INFO] train episode 146: winner = 1, steps = 4\n",
      "00:18:23 [INFO] train episode 147: winner = 1, steps = 6\n",
      "00:18:42 [INFO] train episode 148: winner = 0, steps = 8\n",
      "00:18:59 [INFO] train episode 149: winner = 1, steps = 8\n",
      "00:19:12 [INFO] train episode 150: winner = 0, steps = 8\n",
      "00:19:22 [INFO] train episode 151: winner = 0, steps = 8\n",
      "00:19:25 [INFO] train episode 152: winner = 1, steps = 4\n",
      "00:19:39 [INFO] train episode 153: winner = 1, steps = 4\n",
      "00:19:48 [INFO] train episode 154: winner = 1, steps = 6\n",
      "00:19:51 [INFO] train episode 155: winner = 1, steps = 6\n",
      "00:20:00 [INFO] train episode 156: winner = 1, steps = 8\n",
      "00:20:06 [INFO] train episode 157: winner = 1, steps = 6\n",
      "00:20:09 [INFO] train episode 158: winner = 1, steps = 4\n",
      "00:20:12 [INFO] train episode 159: winner = 0, steps = 8\n",
      "00:20:18 [INFO] train episode 160: winner = 0, steps = 8\n",
      "00:20:27 [INFO] train episode 161: winner = 0, steps = 8\n",
      "00:20:34 [INFO] train episode 162: winner = 0, steps = 8\n",
      "00:20:35 [INFO] train episode 163: winner = 1, steps = 4\n",
      "00:20:41 [INFO] train episode 164: winner = 1, steps = 4\n",
      "00:20:47 [INFO] train episode 165: winner = 0, steps = 8\n",
      "00:20:53 [INFO] train episode 166: winner = 0, steps = 8\n",
      "00:20:57 [INFO] train episode 167: winner = -1, steps = 5\n",
      "00:21:02 [INFO] train episode 168: winner = -1, steps = 7\n",
      "00:21:06 [INFO] train episode 169: winner = 1, steps = 8\n",
      "00:21:12 [INFO] train episode 170: winner = 1, steps = 6\n",
      "00:21:15 [INFO] train episode 171: winner = 1, steps = 6\n",
      "00:21:18 [INFO] train episode 172: winner = 1, steps = 6\n",
      "00:21:21 [INFO] train episode 173: winner = 1, steps = 4\n",
      "00:21:23 [INFO] train episode 174: winner = 1, steps = 4\n",
      "00:21:27 [INFO] train episode 175: winner = 0, steps = 8\n",
      "00:21:29 [INFO] train episode 176: winner = 1, steps = 6\n",
      "00:21:30 [INFO] train episode 177: winner = 1, steps = 6\n",
      "00:21:36 [INFO] train episode 178: winner = 0, steps = 8\n",
      "00:21:37 [INFO] train episode 179: winner = 0, steps = 8\n",
      "00:21:39 [INFO] train episode 180: winner = -1, steps = 7\n",
      "00:21:42 [INFO] train episode 181: winner = 1, steps = 6\n",
      "00:21:45 [INFO] train episode 182: winner = 1, steps = 6\n",
      "00:21:45 [INFO] train episode 183: winner = 0, steps = 8\n",
      "00:21:49 [INFO] train episode 184: winner = 1, steps = 6\n",
      "00:21:54 [INFO] train episode 185: winner = 1, steps = 6\n",
      "00:21:55 [INFO] train episode 186: winner = 1, steps = 4\n",
      "00:21:56 [INFO] train episode 187: winner = 1, steps = 4\n",
      "00:21:57 [INFO] train episode 188: winner = 0, steps = 8\n",
      "00:22:01 [INFO] train episode 189: winner = -1, steps = 5\n",
      "00:22:03 [INFO] train episode 190: winner = 1, steps = 8\n",
      "00:22:04 [INFO] train episode 191: winner = 0, steps = 8\n",
      "00:22:06 [INFO] train episode 192: winner = 0, steps = 8\n",
      "00:22:07 [INFO] train episode 193: winner = 1, steps = 6\n",
      "00:22:12 [INFO] train episode 194: winner = 0, steps = 8\n",
      "00:22:12 [INFO] train episode 195: winner = 0, steps = 8\n",
      "00:22:15 [INFO] train episode 196: winner = 1, steps = 6\n",
      "00:22:19 [INFO] train episode 197: winner = -1, steps = 5\n",
      "00:22:20 [INFO] train episode 198: winner = 1, steps = 6\n",
      "00:22:21 [INFO] train episode 199: winner = 1, steps = 4\n",
      "00:22:21 [INFO] train episode 200: winner = 0, steps = 8\n",
      "00:22:23 [INFO] train episode 201: winner = 0, steps = 8\n",
      "00:22:23 [INFO] train episode 202: winner = 0, steps = 8\n",
      "00:22:23 [INFO] train episode 203: winner = 1, steps = 4\n",
      "00:22:27 [INFO] train episode 204: winner = 1, steps = 6\n",
      "00:22:30 [INFO] train episode 205: winner = 1, steps = 8\n",
      "00:22:31 [INFO] train episode 206: winner = 0, steps = 8\n",
      "00:22:33 [INFO] train episode 207: winner = -1, steps = 7\n",
      "00:22:37 [INFO] train episode 208: winner = 0, steps = 8\n",
      "00:22:41 [INFO] train episode 209: winner = -1, steps = 5\n",
      "00:22:43 [INFO] train episode 210: winner = 1, steps = 6\n",
      "00:22:45 [INFO] train episode 211: winner = 1, steps = 6\n",
      "00:22:50 [INFO] train episode 212: winner = 0, steps = 8\n",
      "00:22:51 [INFO] train episode 213: winner = -1, steps = 5\n",
      "00:22:54 [INFO] train episode 214: winner = -1, steps = 7\n",
      "00:22:58 [INFO] train episode 215: winner = 0, steps = 8\n",
      "00:23:04 [INFO] train episode 216: winner = 0, steps = 8\n",
      "00:23:07 [INFO] train episode 217: winner = 1, steps = 4\n",
      "00:23:07 [INFO] train episode 218: winner = 1, steps = 4\n",
      "00:23:08 [INFO] train episode 219: winner = 0, steps = 8\n",
      "00:23:10 [INFO] train episode 220: winner = 0, steps = 8\n",
      "00:23:16 [INFO] train episode 221: winner = 0, steps = 8\n",
      "00:23:19 [INFO] train episode 222: winner = 1, steps = 8\n",
      "00:23:21 [INFO] train episode 223: winner = 0, steps = 8\n",
      "00:23:24 [INFO] train episode 224: winner = 1, steps = 6\n",
      "00:23:24 [INFO] train episode 225: winner = 1, steps = 4\n",
      "00:23:25 [INFO] train episode 226: winner = 1, steps = 4\n",
      "00:23:26 [INFO] train episode 227: winner = 1, steps = 4\n",
      "00:23:26 [INFO] train episode 228: winner = 0, steps = 8\n",
      "00:23:26 [INFO] train episode 229: winner = 1, steps = 4\n",
      "00:23:27 [INFO] train episode 230: winner = 1, steps = 6\n",
      "00:23:33 [INFO] train episode 231: winner = 0, steps = 8\n",
      "00:23:33 [INFO] train episode 232: winner = 1, steps = 6\n",
      "00:23:36 [INFO] train episode 233: winner = 0, steps = 8\n",
      "00:23:36 [INFO] train episode 234: winner = 1, steps = 4\n",
      "00:23:38 [INFO] train episode 235: winner = 1, steps = 4\n",
      "00:23:38 [INFO] train episode 236: winner = 1, steps = 6\n",
      "00:23:42 [INFO] train episode 237: winner = 0, steps = 8\n",
      "00:23:43 [INFO] train episode 238: winner = 1, steps = 4\n",
      "00:23:50 [INFO] train episode 239: winner = 0, steps = 8\n",
      "00:23:56 [INFO] train episode 240: winner = 1, steps = 6\n",
      "00:23:58 [INFO] train episode 241: winner = 0, steps = 8\n",
      "00:24:03 [INFO] train episode 242: winner = 1, steps = 8\n",
      "00:24:06 [INFO] train episode 243: winner = 1, steps = 6\n",
      "00:24:09 [INFO] train episode 244: winner = 1, steps = 8\n",
      "00:24:12 [INFO] train episode 245: winner = 1, steps = 4\n",
      "00:24:12 [INFO] train episode 246: winner = 0, steps = 8\n",
      "00:24:22 [INFO] train episode 247: winner = 0, steps = 8\n",
      "00:24:32 [INFO] train episode 248: winner = 1, steps = 6\n",
      "00:24:33 [INFO] train episode 249: winner = 0, steps = 8\n",
      "00:24:34 [INFO] train episode 250: winner = 1, steps = 4\n",
      "00:24:35 [INFO] train episode 251: winner = 1, steps = 8\n",
      "00:24:36 [INFO] train episode 252: winner = -1, steps = 7\n",
      "00:24:36 [INFO] train episode 253: winner = 0, steps = 8\n",
      "00:24:37 [INFO] train episode 254: winner = -1, steps = 5\n",
      "00:24:38 [INFO] train episode 255: winner = 0, steps = 8\n",
      "00:24:39 [INFO] train episode 256: winner = -1, steps = 7\n",
      "00:24:40 [INFO] train episode 257: winner = 1, steps = 6\n",
      "00:24:40 [INFO] train episode 258: winner = 0, steps = 8\n",
      "00:24:42 [INFO] train episode 259: winner = 1, steps = 8\n",
      "00:24:44 [INFO] train episode 260: winner = 1, steps = 6\n",
      "00:24:45 [INFO] train episode 261: winner = 1, steps = 4\n",
      "00:24:46 [INFO] train episode 262: winner = 0, steps = 8\n",
      "00:24:47 [INFO] train episode 263: winner = -1, steps = 7\n",
      "00:24:47 [INFO] train episode 264: winner = 1, steps = 4\n",
      "00:24:48 [INFO] train episode 265: winner = 1, steps = 8\n",
      "00:24:48 [INFO] train episode 266: winner = 0, steps = 8\n",
      "00:24:50 [INFO] train episode 267: winner = 1, steps = 6\n",
      "00:24:50 [INFO] train episode 268: winner = 1, steps = 4\n",
      "00:24:51 [INFO] train episode 269: winner = 1, steps = 6\n",
      "00:24:53 [INFO] train episode 270: winner = 0, steps = 8\n",
      "00:24:55 [INFO] train episode 271: winner = 1, steps = 8\n",
      "00:24:55 [INFO] train episode 272: winner = 1, steps = 6\n",
      "00:24:56 [INFO] train episode 273: winner = 1, steps = 6\n",
      "00:24:58 [INFO] train episode 274: winner = 0, steps = 8\n",
      "00:24:58 [INFO] train episode 275: winner = 1, steps = 4\n",
      "00:25:00 [INFO] train episode 276: winner = 1, steps = 6\n",
      "00:25:00 [INFO] train episode 277: winner = 1, steps = 6\n",
      "00:25:02 [INFO] train episode 278: winner = 1, steps = 4\n",
      "00:25:05 [INFO] train episode 279: winner = 0, steps = 8\n",
      "00:25:05 [INFO] test episode 279:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "00:25:14 [INFO] step 0：player 1, action (2, 1)\n",
      "+++\n",
      "+++\n",
      "+o+\n",
      "00:25:21 [INFO] step 1：player -1, action (1, 1)\n",
      "+++\n",
      "+x+\n",
      "+o+\n",
      "00:25:29 [INFO] step 2：player 1, action (0, 1)\n",
      "+o+\n",
      "+x+\n",
      "+o+\n",
      "00:25:31 [INFO] step 3：player -1, action (1, 0)\n",
      "+o+\n",
      "xx+\n",
      "+o+\n",
      "00:25:32 [INFO] step 4：player 1, action (0, 0)\n",
      "oo+\n",
      "xx+\n",
      "+o+\n",
      "00:25:32 [INFO] step 5：player -1, action (0, 2)\n",
      "oox\n",
      "xx+\n",
      "+o+\n",
      "00:25:32 [INFO] step 6：player 1, action (2, 0)\n",
      "oox\n",
      "xx+\n",
      "oo+\n",
      "00:25:32 [INFO] step 7：player -1, action (1, 2)\n",
      "oox\n",
      "xxx\n",
      "oo+\n",
      "00:25:32 [INFO] test episode 279: winner = -1, steps = 7\n",
      "00:25:51 [INFO] train episode 280: winner = 1, steps = 6\n",
      "00:26:07 [INFO] train episode 281: winner = -1, steps = 7\n",
      "00:26:19 [INFO] train episode 282: winner = 0, steps = 8\n",
      "00:26:24 [INFO] train episode 283: winner = 0, steps = 8\n",
      "00:26:36 [INFO] train episode 284: winner = -1, steps = 7\n",
      "00:26:44 [INFO] train episode 285: winner = -1, steps = 5\n",
      "00:26:54 [INFO] train episode 286: winner = 1, steps = 6\n",
      "00:27:01 [INFO] train episode 287: winner = 1, steps = 4\n",
      "00:27:04 [INFO] train episode 288: winner = 0, steps = 8\n",
      "00:27:08 [INFO] train episode 289: winner = -1, steps = 5\n",
      "00:27:17 [INFO] train episode 290: winner = 1, steps = 8\n",
      "00:27:20 [INFO] train episode 291: winner = -1, steps = 7\n",
      "00:27:22 [INFO] train episode 292: winner = 0, steps = 8\n",
      "00:27:24 [INFO] train episode 293: winner = 0, steps = 8\n",
      "00:27:27 [INFO] train episode 294: winner = -1, steps = 5\n",
      "00:27:35 [INFO] train episode 295: winner = -1, steps = 5\n",
      "00:27:38 [INFO] train episode 296: winner = 0, steps = 8\n",
      "00:27:42 [INFO] train episode 297: winner = 1, steps = 4\n",
      "00:27:45 [INFO] train episode 298: winner = 0, steps = 8\n",
      "00:27:47 [INFO] train episode 299: winner = 0, steps = 8\n",
      "00:27:50 [INFO] train episode 300: winner = 1, steps = 4\n",
      "00:27:53 [INFO] train episode 301: winner = 1, steps = 6\n",
      "00:27:57 [INFO] train episode 302: winner = 1, steps = 6\n",
      "00:28:00 [INFO] train episode 303: winner = 0, steps = 8\n",
      "00:28:04 [INFO] train episode 304: winner = 1, steps = 8\n",
      "00:28:07 [INFO] train episode 305: winner = 1, steps = 6\n",
      "00:28:14 [INFO] train episode 306: winner = -1, steps = 5\n",
      "00:28:17 [INFO] train episode 307: winner = 1, steps = 6\n",
      "00:28:18 [INFO] train episode 308: winner = 1, steps = 4\n",
      "00:28:24 [INFO] train episode 309: winner = 0, steps = 8\n",
      "00:28:26 [INFO] train episode 310: winner = 1, steps = 6\n",
      "00:28:29 [INFO] train episode 311: winner = 1, steps = 6\n",
      "00:28:37 [INFO] train episode 312: winner = 1, steps = 4\n",
      "00:28:41 [INFO] train episode 313: winner = 1, steps = 6\n",
      "00:28:44 [INFO] train episode 314: winner = -1, steps = 7\n",
      "00:28:48 [INFO] train episode 315: winner = -1, steps = 5\n",
      "00:28:52 [INFO] train episode 316: winner = 0, steps = 8\n",
      "00:28:52 [INFO] train episode 317: winner = -1, steps = 5\n",
      "00:28:53 [INFO] train episode 318: winner = 1, steps = 6\n",
      "00:28:55 [INFO] train episode 319: winner = 1, steps = 6\n",
      "00:29:01 [INFO] train episode 320: winner = 1, steps = 4\n",
      "00:29:01 [INFO] train episode 321: winner = -1, steps = 7\n",
      "00:29:02 [INFO] train episode 322: winner = 1, steps = 4\n",
      "00:29:04 [INFO] train episode 323: winner = 1, steps = 6\n",
      "00:29:09 [INFO] train episode 324: winner = 1, steps = 6\n",
      "00:29:12 [INFO] train episode 325: winner = 1, steps = 4\n",
      "00:29:18 [INFO] train episode 326: winner = 0, steps = 8\n",
      "00:29:20 [INFO] train episode 327: winner = 1, steps = 6\n",
      "00:29:22 [INFO] train episode 328: winner = 0, steps = 8\n",
      "00:29:29 [INFO] train episode 329: winner = 1, steps = 6\n",
      "00:29:31 [INFO] train episode 330: winner = 1, steps = 6\n",
      "00:29:37 [INFO] train episode 331: winner = 0, steps = 8\n",
      "00:29:42 [INFO] train episode 332: winner = 0, steps = 8\n",
      "00:29:44 [INFO] train episode 333: winner = 1, steps = 6\n",
      "00:29:49 [INFO] train episode 334: winner = 1, steps = 8\n",
      "00:29:50 [INFO] train episode 335: winner = -1, steps = 7\n",
      "00:29:50 [INFO] train episode 336: winner = 0, steps = 8\n",
      "00:29:52 [INFO] train episode 337: winner = 1, steps = 4\n",
      "00:29:53 [INFO] train episode 338: winner = 1, steps = 6\n",
      "00:29:54 [INFO] train episode 339: winner = -1, steps = 5\n",
      "00:29:55 [INFO] train episode 340: winner = 1, steps = 6\n",
      "00:29:56 [INFO] train episode 341: winner = 1, steps = 4\n",
      "00:29:56 [INFO] train episode 342: winner = 1, steps = 6\n",
      "00:30:00 [INFO] train episode 343: winner = -1, steps = 5\n",
      "00:30:00 [INFO] train episode 344: winner = 1, steps = 6\n",
      "00:30:02 [INFO] train episode 345: winner = 0, steps = 8\n",
      "00:30:04 [INFO] train episode 346: winner = 0, steps = 8\n",
      "00:30:06 [INFO] train episode 347: winner = 1, steps = 6\n",
      "00:30:12 [INFO] train episode 348: winner = 1, steps = 6\n",
      "00:30:16 [INFO] train episode 349: winner = 0, steps = 8\n",
      "00:30:20 [INFO] train episode 350: winner = 0, steps = 8\n",
      "00:30:20 [INFO] train episode 351: winner = 1, steps = 4\n",
      "00:30:23 [INFO] train episode 352: winner = 1, steps = 6\n",
      "00:30:26 [INFO] train episode 353: winner = 1, steps = 6\n",
      "00:30:29 [INFO] train episode 354: winner = 0, steps = 8\n",
      "00:30:29 [INFO] train episode 355: winner = 1, steps = 6\n",
      "00:30:31 [INFO] train episode 356: winner = 0, steps = 8\n",
      "00:30:35 [INFO] train episode 357: winner = 0, steps = 8\n",
      "00:30:35 [INFO] train episode 358: winner = 1, steps = 4\n",
      "00:30:36 [INFO] train episode 359: winner = -1, steps = 7\n",
      "00:30:37 [INFO] train episode 360: winner = 1, steps = 4\n",
      "00:30:42 [INFO] train episode 361: winner = 1, steps = 6\n",
      "00:30:46 [INFO] train episode 362: winner = -1, steps = 7\n",
      "00:30:47 [INFO] train episode 363: winner = -1, steps = 5\n",
      "00:30:49 [INFO] train episode 364: winner = 1, steps = 4\n",
      "00:30:51 [INFO] train episode 365: winner = 1, steps = 4\n",
      "00:30:51 [INFO] train episode 366: winner = 1, steps = 6\n",
      "00:30:51 [INFO] train episode 367: winner = -1, steps = 7\n",
      "00:30:52 [INFO] train episode 368: winner = 1, steps = 4\n",
      "00:30:52 [INFO] train episode 369: winner = 0, steps = 8\n",
      "00:30:52 [INFO] train episode 370: winner = 1, steps = 4\n",
      "00:30:52 [INFO] train episode 371: winner = 1, steps = 6\n",
      "00:30:53 [INFO] train episode 372: winner = 1, steps = 6\n",
      "00:30:55 [INFO] train episode 373: winner = 0, steps = 8\n",
      "00:30:56 [INFO] train episode 374: winner = 0, steps = 8\n",
      "00:30:58 [INFO] train episode 375: winner = 1, steps = 4\n",
      "00:31:03 [INFO] train episode 376: winner = 1, steps = 6\n",
      "00:31:04 [INFO] train episode 377: winner = 1, steps = 6\n",
      "00:31:05 [INFO] train episode 378: winner = 0, steps = 8\n",
      "00:31:05 [INFO] train episode 379: winner = 1, steps = 4\n",
      "00:31:05 [INFO] train episode 380: winner = 0, steps = 8\n",
      "00:31:05 [INFO] train episode 381: winner = 1, steps = 6\n",
      "00:31:08 [INFO] train episode 382: winner = 0, steps = 8\n",
      "00:31:08 [INFO] train episode 383: winner = 1, steps = 6\n",
      "00:31:11 [INFO] train episode 384: winner = 0, steps = 8\n",
      "00:31:13 [INFO] train episode 385: winner = -1, steps = 5\n",
      "00:31:13 [INFO] train episode 386: winner = 0, steps = 8\n",
      "00:31:13 [INFO] train episode 387: winner = 0, steps = 8\n",
      "00:31:13 [INFO] train episode 388: winner = 1, steps = 4\n",
      "00:31:16 [INFO] train episode 389: winner = 1, steps = 6\n",
      "00:31:18 [INFO] train episode 390: winner = 1, steps = 6\n",
      "00:31:21 [INFO] train episode 391: winner = 0, steps = 8\n",
      "00:31:24 [INFO] train episode 392: winner = 1, steps = 4\n",
      "00:31:26 [INFO] train episode 393: winner = 0, steps = 8\n",
      "00:31:30 [INFO] train episode 394: winner = 1, steps = 6\n",
      "00:31:32 [INFO] train episode 395: winner = 0, steps = 8\n",
      "00:31:33 [INFO] train episode 396: winner = 1, steps = 6\n",
      "00:31:34 [INFO] train episode 397: winner = 1, steps = 6\n",
      "00:31:35 [INFO] train episode 398: winner = 0, steps = 8\n",
      "00:31:35 [INFO] train episode 399: winner = 1, steps = 4\n",
      "00:31:36 [INFO] train episode 400: winner = 0, steps = 8\n",
      "00:31:36 [INFO] train episode 401: winner = 1, steps = 4\n",
      "00:31:36 [INFO] train episode 402: winner = -1, steps = 5\n",
      "00:31:38 [INFO] train episode 403: winner = 1, steps = 6\n",
      "00:31:40 [INFO] train episode 404: winner = -1, steps = 5\n",
      "00:31:40 [INFO] train episode 405: winner = 0, steps = 8\n",
      "00:31:43 [INFO] train episode 406: winner = 1, steps = 6\n",
      "00:31:44 [INFO] train episode 407: winner = 1, steps = 6\n",
      "00:31:45 [INFO] train episode 408: winner = -1, steps = 5\n",
      "00:31:46 [INFO] train episode 409: winner = 0, steps = 8\n",
      "00:31:47 [INFO] train episode 410: winner = 0, steps = 8\n",
      "00:31:47 [INFO] train episode 411: winner = 1, steps = 6\n",
      "00:31:48 [INFO] train episode 412: winner = 1, steps = 6\n",
      "00:31:49 [INFO] train episode 413: winner = 1, steps = 4\n",
      "00:31:52 [INFO] train episode 414: winner = 0, steps = 8\n",
      "00:31:53 [INFO] train episode 415: winner = -1, steps = 7\n",
      "00:31:54 [INFO] train episode 416: winner = 0, steps = 8\n",
      "00:31:56 [INFO] train episode 417: winner = 1, steps = 4\n",
      "00:31:56 [INFO] test episode 417:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "00:32:05 [INFO] step 0：player 1, action (2, 1)\n",
      "+++\n",
      "+++\n",
      "+o+\n",
      "00:32:13 [INFO] step 1：player -1, action (1, 2)\n",
      "+++\n",
      "++x\n",
      "+o+\n",
      "00:32:18 [INFO] step 2：player 1, action (1, 1)\n",
      "+++\n",
      "+ox\n",
      "+o+\n",
      "00:32:18 [INFO] step 3：player -1, action (0, 1)\n",
      "+x+\n",
      "+ox\n",
      "+o+\n",
      "00:32:19 [INFO] step 4：player 1, action (2, 2)\n",
      "+x+\n",
      "+ox\n",
      "+oo\n",
      "00:32:19 [INFO] step 5：player -1, action (0, 2)\n",
      "+xx\n",
      "+ox\n",
      "+oo\n",
      "00:32:19 [INFO] step 6：player 1, action (0, 0)\n",
      "oxx\n",
      "+ox\n",
      "+oo\n",
      "00:32:19 [INFO] test episode 417: winner = 1, steps = 6\n",
      "00:32:37 [INFO] train episode 418: winner = 1, steps = 8\n",
      "00:32:58 [INFO] train episode 419: winner = 1, steps = 6\n",
      "00:33:12 [INFO] train episode 420: winner = 0, steps = 8\n",
      "00:33:25 [INFO] train episode 421: winner = 1, steps = 4\n",
      "00:33:32 [INFO] train episode 422: winner = 1, steps = 6\n",
      "00:33:43 [INFO] train episode 423: winner = 1, steps = 6\n",
      "00:33:44 [INFO] train episode 424: winner = 1, steps = 4\n",
      "00:33:49 [INFO] train episode 425: winner = 1, steps = 4\n",
      "00:33:58 [INFO] train episode 426: winner = 0, steps = 8\n",
      "00:34:01 [INFO] train episode 427: winner = 1, steps = 8\n",
      "00:34:06 [INFO] train episode 428: winner = -1, steps = 7\n",
      "00:34:07 [INFO] train episode 429: winner = 1, steps = 4\n",
      "00:34:11 [INFO] train episode 430: winner = 0, steps = 8\n",
      "00:34:15 [INFO] train episode 431: winner = 0, steps = 8\n",
      "00:34:17 [INFO] train episode 432: winner = 1, steps = 6\n",
      "00:34:24 [INFO] train episode 433: winner = 1, steps = 4\n",
      "00:34:29 [INFO] train episode 434: winner = 1, steps = 4\n",
      "00:34:32 [INFO] train episode 435: winner = 1, steps = 4\n",
      "00:34:38 [INFO] train episode 436: winner = 0, steps = 8\n",
      "00:34:49 [INFO] train episode 437: winner = 1, steps = 4\n",
      "00:34:56 [INFO] train episode 438: winner = -1, steps = 5\n",
      "00:35:04 [INFO] train episode 439: winner = 0, steps = 8\n",
      "00:35:05 [INFO] train episode 440: winner = 1, steps = 4\n",
      "00:35:07 [INFO] train episode 441: winner = 1, steps = 4\n",
      "00:35:09 [INFO] train episode 442: winner = -1, steps = 5\n",
      "00:35:15 [INFO] train episode 443: winner = 1, steps = 4\n",
      "00:35:19 [INFO] train episode 444: winner = 1, steps = 4\n",
      "00:35:21 [INFO] train episode 445: winner = -1, steps = 7\n",
      "00:35:23 [INFO] train episode 446: winner = -1, steps = 7\n",
      "00:35:28 [INFO] train episode 447: winner = -1, steps = 5\n",
      "00:35:28 [INFO] train episode 448: winner = 1, steps = 4\n",
      "00:35:34 [INFO] train episode 449: winner = -1, steps = 7\n",
      "00:35:35 [INFO] train episode 450: winner = 1, steps = 6\n",
      "00:35:39 [INFO] train episode 451: winner = 1, steps = 6\n",
      "00:35:41 [INFO] train episode 452: winner = 1, steps = 4\n",
      "00:35:42 [INFO] train episode 453: winner = -1, steps = 5\n",
      "00:35:45 [INFO] train episode 454: winner = 0, steps = 8\n",
      "00:35:47 [INFO] train episode 455: winner = 1, steps = 6\n",
      "00:35:47 [INFO] train episode 456: winner = 1, steps = 4\n",
      "00:35:49 [INFO] train episode 457: winner = 1, steps = 6\n",
      "00:35:52 [INFO] train episode 458: winner = 0, steps = 8\n",
      "00:35:55 [INFO] train episode 459: winner = -1, steps = 7\n",
      "00:35:59 [INFO] train episode 460: winner = -1, steps = 7\n",
      "00:36:02 [INFO] train episode 461: winner = -1, steps = 7\n",
      "00:36:02 [INFO] train episode 462: winner = 0, steps = 8\n",
      "00:36:07 [INFO] train episode 463: winner = 0, steps = 8\n",
      "00:36:08 [INFO] train episode 464: winner = 0, steps = 8\n",
      "00:36:09 [INFO] train episode 465: winner = 0, steps = 8\n",
      "00:36:12 [INFO] train episode 466: winner = 1, steps = 4\n",
      "00:36:15 [INFO] train episode 467: winner = 1, steps = 6\n",
      "00:36:17 [INFO] train episode 468: winner = -1, steps = 7\n",
      "00:36:20 [INFO] train episode 469: winner = 0, steps = 8\n",
      "00:36:24 [INFO] train episode 470: winner = 1, steps = 4\n",
      "00:36:24 [INFO] train episode 471: winner = 1, steps = 6\n",
      "00:36:28 [INFO] train episode 472: winner = 0, steps = 8\n",
      "00:36:29 [INFO] train episode 473: winner = 1, steps = 4\n",
      "00:36:30 [INFO] train episode 474: winner = -1, steps = 7\n",
      "00:36:32 [INFO] train episode 475: winner = 1, steps = 4\n",
      "00:36:34 [INFO] train episode 476: winner = 1, steps = 6\n",
      "00:36:34 [INFO] train episode 477: winner = 1, steps = 4\n",
      "00:36:38 [INFO] train episode 478: winner = 0, steps = 8\n",
      "00:36:38 [INFO] train episode 479: winner = 1, steps = 4\n",
      "00:36:40 [INFO] train episode 480: winner = 0, steps = 8\n",
      "00:36:42 [INFO] train episode 481: winner = 1, steps = 6\n",
      "00:36:42 [INFO] train episode 482: winner = 0, steps = 8\n",
      "00:36:45 [INFO] train episode 483: winner = 1, steps = 6\n",
      "00:36:45 [INFO] train episode 484: winner = 1, steps = 8\n",
      "00:36:45 [INFO] train episode 485: winner = 1, steps = 6\n",
      "00:36:48 [INFO] train episode 486: winner = 1, steps = 6\n",
      "00:36:50 [INFO] train episode 487: winner = 0, steps = 8\n",
      "00:36:53 [INFO] train episode 488: winner = 0, steps = 8\n",
      "00:36:55 [INFO] train episode 489: winner = 0, steps = 8\n",
      "00:36:56 [INFO] train episode 490: winner = 1, steps = 6\n",
      "00:37:00 [INFO] train episode 491: winner = 0, steps = 8\n",
      "00:37:03 [INFO] train episode 492: winner = 0, steps = 8\n",
      "00:37:06 [INFO] train episode 493: winner = 0, steps = 8\n",
      "00:37:07 [INFO] train episode 494: winner = 1, steps = 6\n",
      "00:37:08 [INFO] train episode 495: winner = 1, steps = 6\n",
      "00:37:10 [INFO] train episode 496: winner = -1, steps = 5\n",
      "00:37:11 [INFO] train episode 497: winner = 1, steps = 4\n",
      "00:37:11 [INFO] train episode 498: winner = 1, steps = 4\n",
      "00:37:12 [INFO] train episode 499: winner = 1, steps = 6\n",
      "00:37:12 [INFO] train episode 500: winner = 1, steps = 6\n",
      "00:37:15 [INFO] train episode 501: winner = 0, steps = 8\n",
      "00:37:16 [INFO] train episode 502: winner = 1, steps = 6\n",
      "00:37:19 [INFO] train episode 503: winner = 1, steps = 6\n",
      "00:37:20 [INFO] train episode 504: winner = 1, steps = 4\n",
      "00:37:22 [INFO] train episode 505: winner = 1, steps = 4\n",
      "00:37:24 [INFO] train episode 506: winner = 1, steps = 6\n",
      "00:37:24 [INFO] train episode 507: winner = 1, steps = 6\n",
      "00:37:24 [INFO] train episode 508: winner = 1, steps = 4\n",
      "00:37:24 [INFO] train episode 509: winner = 1, steps = 4\n",
      "00:37:31 [INFO] train episode 510: winner = 0, steps = 8\n",
      "00:37:32 [INFO] train episode 511: winner = -1, steps = 5\n",
      "00:37:33 [INFO] train episode 512: winner = 1, steps = 6\n",
      "00:37:34 [INFO] train episode 513: winner = 1, steps = 4\n",
      "00:37:34 [INFO] train episode 514: winner = 1, steps = 4\n",
      "00:37:35 [INFO] train episode 515: winner = 1, steps = 4\n",
      "00:37:36 [INFO] train episode 516: winner = 1, steps = 8\n",
      "00:37:39 [INFO] train episode 517: winner = 0, steps = 8\n",
      "00:37:40 [INFO] train episode 518: winner = 0, steps = 8\n",
      "00:37:43 [INFO] train episode 519: winner = 1, steps = 6\n",
      "00:37:45 [INFO] train episode 520: winner = 0, steps = 8\n",
      "00:37:47 [INFO] train episode 521: winner = 1, steps = 4\n",
      "00:37:48 [INFO] train episode 522: winner = 1, steps = 6\n",
      "00:37:49 [INFO] train episode 523: winner = 1, steps = 6\n",
      "00:37:51 [INFO] train episode 524: winner = 0, steps = 8\n",
      "00:37:51 [INFO] train episode 525: winner = 1, steps = 4\n",
      "00:37:53 [INFO] train episode 526: winner = 0, steps = 8\n",
      "00:37:54 [INFO] train episode 527: winner = 1, steps = 4\n",
      "00:37:55 [INFO] train episode 528: winner = 1, steps = 6\n",
      "00:37:55 [INFO] train episode 529: winner = 0, steps = 8\n",
      "00:37:58 [INFO] train episode 530: winner = -1, steps = 7\n",
      "00:37:59 [INFO] train episode 531: winner = 1, steps = 6\n",
      "00:38:00 [INFO] train episode 532: winner = 0, steps = 8\n",
      "00:38:04 [INFO] train episode 533: winner = -1, steps = 5\n",
      "00:38:05 [INFO] train episode 534: winner = 1, steps = 6\n",
      "00:38:05 [INFO] train episode 535: winner = 1, steps = 4\n",
      "00:38:05 [INFO] train episode 536: winner = 1, steps = 6\n",
      "00:38:05 [INFO] train episode 537: winner = 1, steps = 4\n",
      "00:38:06 [INFO] train episode 538: winner = 1, steps = 6\n",
      "00:38:07 [INFO] train episode 539: winner = 1, steps = 6\n",
      "00:38:08 [INFO] train episode 540: winner = 0, steps = 8\n",
      "00:38:08 [INFO] train episode 541: winner = 0, steps = 8\n",
      "00:38:08 [INFO] train episode 542: winner = 1, steps = 6\n",
      "00:38:09 [INFO] train episode 543: winner = 1, steps = 8\n",
      "00:38:12 [INFO] train episode 544: winner = 1, steps = 6\n",
      "00:38:12 [INFO] train episode 545: winner = -1, steps = 5\n",
      "00:38:12 [INFO] train episode 546: winner = 1, steps = 4\n",
      "00:38:12 [INFO] train episode 547: winner = 0, steps = 8\n",
      "00:38:13 [INFO] train episode 548: winner = 1, steps = 4\n",
      "00:38:13 [INFO] train episode 549: winner = 1, steps = 6\n",
      "00:38:14 [INFO] train episode 550: winner = 1, steps = 8\n",
      "00:38:16 [INFO] train episode 551: winner = 1, steps = 4\n",
      "00:38:16 [INFO] train episode 552: winner = 0, steps = 8\n",
      "00:38:17 [INFO] train episode 553: winner = 1, steps = 4\n",
      "00:38:20 [INFO] train episode 554: winner = 1, steps = 6\n",
      "00:38:20 [INFO] train episode 555: winner = 1, steps = 4\n",
      "00:38:20 [INFO] train episode 556: winner = 0, steps = 8\n",
      "00:38:20 [INFO] train episode 557: winner = -1, steps = 5\n",
      "00:38:20 [INFO] train episode 558: winner = 0, steps = 8\n",
      "00:38:21 [INFO] train episode 559: winner = 0, steps = 8\n",
      "00:38:21 [INFO] test episode 559:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "00:38:30 [INFO] step 0：player 1, action (0, 2)\n",
      "++o\n",
      "+++\n",
      "+++\n",
      "00:38:38 [INFO] step 1：player -1, action (1, 0)\n",
      "++o\n",
      "x++\n",
      "+++\n",
      "00:38:44 [INFO] step 2：player 1, action (1, 2)\n",
      "++o\n",
      "x+o\n",
      "+++\n",
      "00:38:46 [INFO] step 3：player -1, action (2, 2)\n",
      "++o\n",
      "x+o\n",
      "++x\n",
      "00:38:47 [INFO] step 4：player 1, action (0, 0)\n",
      "o+o\n",
      "x+o\n",
      "++x\n",
      "00:38:47 [INFO] step 5：player -1, action (2, 0)\n",
      "o+o\n",
      "x+o\n",
      "x+x\n",
      "00:38:47 [INFO] step 6：player 1, action (0, 1)\n",
      "ooo\n",
      "x+o\n",
      "x+x\n",
      "00:38:47 [INFO] test episode 559: winner = 1, steps = 6\n",
      "00:38:59 [INFO] train episode 560: winner = 1, steps = 4\n",
      "00:39:05 [INFO] train episode 561: winner = 1, steps = 6\n",
      "00:39:23 [INFO] train episode 562: winner = 1, steps = 6\n",
      "00:39:38 [INFO] train episode 563: winner = 1, steps = 6\n",
      "00:39:42 [INFO] train episode 564: winner = 1, steps = 6\n",
      "00:39:54 [INFO] train episode 565: winner = 1, steps = 6\n",
      "00:40:02 [INFO] train episode 566: winner = 0, steps = 8\n",
      "00:40:12 [INFO] train episode 567: winner = 1, steps = 4\n",
      "00:40:28 [INFO] train episode 568: winner = 0, steps = 8\n",
      "00:40:39 [INFO] train episode 569: winner = 0, steps = 8\n",
      "00:40:50 [INFO] train episode 570: winner = 1, steps = 6\n",
      "00:40:59 [INFO] train episode 571: winner = 0, steps = 8\n",
      "00:41:04 [INFO] train episode 572: winner = 1, steps = 4\n",
      "00:41:08 [INFO] train episode 573: winner = 0, steps = 8\n",
      "00:41:10 [INFO] train episode 574: winner = -1, steps = 7\n",
      "00:41:11 [INFO] train episode 575: winner = 1, steps = 6\n",
      "00:41:18 [INFO] train episode 576: winner = 1, steps = 6\n",
      "00:41:25 [INFO] train episode 577: winner = 0, steps = 8\n",
      "00:41:26 [INFO] train episode 578: winner = 1, steps = 6\n",
      "00:41:35 [INFO] train episode 579: winner = 1, steps = 6\n",
      "00:41:36 [INFO] train episode 580: winner = 1, steps = 6\n",
      "00:41:42 [INFO] train episode 581: winner = 1, steps = 8\n",
      "00:41:44 [INFO] train episode 582: winner = 1, steps = 4\n",
      "00:41:51 [INFO] train episode 583: winner = 1, steps = 8\n",
      "00:41:54 [INFO] train episode 584: winner = 1, steps = 6\n",
      "00:41:58 [INFO] train episode 585: winner = 1, steps = 4\n",
      "00:42:04 [INFO] train episode 586: winner = -1, steps = 7\n",
      "00:42:06 [INFO] train episode 587: winner = 1, steps = 6\n",
      "00:42:13 [INFO] train episode 588: winner = 1, steps = 6\n",
      "00:42:17 [INFO] train episode 589: winner = -1, steps = 7\n",
      "00:42:20 [INFO] train episode 590: winner = 1, steps = 6\n",
      "00:42:22 [INFO] train episode 591: winner = 1, steps = 6\n",
      "00:42:22 [INFO] train episode 592: winner = 1, steps = 6\n",
      "00:42:24 [INFO] train episode 593: winner = 1, steps = 4\n",
      "00:42:26 [INFO] train episode 594: winner = 1, steps = 4\n",
      "00:42:29 [INFO] train episode 595: winner = 1, steps = 6\n",
      "00:42:31 [INFO] train episode 596: winner = 1, steps = 6\n",
      "00:42:34 [INFO] train episode 597: winner = 0, steps = 8\n",
      "00:42:36 [INFO] train episode 598: winner = 1, steps = 6\n",
      "00:42:37 [INFO] train episode 599: winner = 1, steps = 6\n",
      "00:42:39 [INFO] train episode 600: winner = 1, steps = 4\n",
      "00:42:42 [INFO] train episode 601: winner = 0, steps = 8\n",
      "00:42:42 [INFO] train episode 602: winner = 1, steps = 6\n",
      "00:42:44 [INFO] train episode 603: winner = 1, steps = 6\n",
      "00:42:46 [INFO] train episode 604: winner = 0, steps = 8\n",
      "00:42:46 [INFO] train episode 605: winner = 1, steps = 6\n",
      "00:42:49 [INFO] train episode 606: winner = 1, steps = 6\n",
      "00:42:49 [INFO] train episode 607: winner = 1, steps = 4\n",
      "00:42:49 [INFO] train episode 608: winner = 1, steps = 4\n",
      "00:42:49 [INFO] train episode 609: winner = 1, steps = 6\n",
      "00:42:51 [INFO] train episode 610: winner = 1, steps = 6\n",
      "00:42:56 [INFO] train episode 611: winner = 1, steps = 8\n",
      "00:43:00 [INFO] train episode 612: winner = 0, steps = 8\n",
      "00:43:01 [INFO] train episode 613: winner = 1, steps = 4\n",
      "00:43:03 [INFO] train episode 614: winner = 1, steps = 6\n",
      "00:43:03 [INFO] train episode 615: winner = 1, steps = 4\n",
      "00:43:05 [INFO] train episode 616: winner = 1, steps = 4\n",
      "00:43:07 [INFO] train episode 617: winner = 1, steps = 6\n",
      "00:43:08 [INFO] train episode 618: winner = -1, steps = 5\n",
      "00:43:10 [INFO] train episode 619: winner = 1, steps = 6\n",
      "00:43:10 [INFO] train episode 620: winner = 1, steps = 6\n",
      "00:43:12 [INFO] train episode 621: winner = 1, steps = 8\n",
      "00:43:12 [INFO] train episode 622: winner = 1, steps = 6\n",
      "00:43:15 [INFO] train episode 623: winner = 0, steps = 8\n",
      "00:43:16 [INFO] train episode 624: winner = 1, steps = 4\n",
      "00:43:17 [INFO] train episode 625: winner = -1, steps = 5\n",
      "00:43:18 [INFO] train episode 626: winner = 1, steps = 4\n",
      "00:43:19 [INFO] train episode 627: winner = -1, steps = 5\n",
      "00:43:25 [INFO] train episode 628: winner = 0, steps = 8\n",
      "00:43:25 [INFO] train episode 629: winner = 0, steps = 8\n",
      "00:43:27 [INFO] train episode 630: winner = -1, steps = 5\n",
      "00:43:29 [INFO] train episode 631: winner = 1, steps = 4\n",
      "00:43:30 [INFO] train episode 632: winner = 1, steps = 6\n",
      "00:43:31 [INFO] train episode 633: winner = 1, steps = 6\n",
      "00:43:33 [INFO] train episode 634: winner = 1, steps = 6\n",
      "00:43:34 [INFO] train episode 635: winner = 1, steps = 6\n",
      "00:43:36 [INFO] train episode 636: winner = -1, steps = 7\n",
      "00:43:36 [INFO] train episode 637: winner = 1, steps = 6\n",
      "00:43:37 [INFO] train episode 638: winner = 1, steps = 4\n",
      "00:43:37 [INFO] train episode 639: winner = 1, steps = 4\n",
      "00:43:39 [INFO] train episode 640: winner = -1, steps = 7\n",
      "00:43:41 [INFO] train episode 641: winner = 1, steps = 6\n",
      "00:43:43 [INFO] train episode 642: winner = -1, steps = 5\n",
      "00:43:46 [INFO] train episode 643: winner = 1, steps = 4\n",
      "00:43:46 [INFO] train episode 644: winner = 1, steps = 4\n",
      "00:43:48 [INFO] train episode 645: winner = 0, steps = 8\n",
      "00:43:51 [INFO] train episode 646: winner = 0, steps = 8\n",
      "00:43:52 [INFO] train episode 647: winner = 0, steps = 8\n",
      "00:43:52 [INFO] train episode 648: winner = 0, steps = 8\n",
      "00:43:55 [INFO] train episode 649: winner = 0, steps = 8\n",
      "00:43:58 [INFO] train episode 650: winner = 0, steps = 8\n",
      "00:43:59 [INFO] train episode 651: winner = 1, steps = 6\n",
      "00:44:01 [INFO] train episode 652: winner = -1, steps = 5\n",
      "00:44:01 [INFO] train episode 653: winner = 1, steps = 4\n",
      "00:44:02 [INFO] train episode 654: winner = 1, steps = 6\n",
      "00:44:03 [INFO] train episode 655: winner = 1, steps = 6\n",
      "00:44:06 [INFO] train episode 656: winner = -1, steps = 5\n",
      "00:44:11 [INFO] train episode 657: winner = 0, steps = 8\n",
      "00:44:11 [INFO] train episode 658: winner = 1, steps = 6\n",
      "00:44:11 [INFO] train episode 659: winner = 1, steps = 4\n",
      "00:44:11 [INFO] train episode 660: winner = 0, steps = 8\n",
      "00:44:11 [INFO] train episode 661: winner = 0, steps = 8\n",
      "00:44:11 [INFO] train episode 662: winner = 1, steps = 4\n",
      "00:44:14 [INFO] train episode 663: winner = 0, steps = 8\n",
      "00:44:15 [INFO] train episode 664: winner = 1, steps = 6\n",
      "00:44:16 [INFO] train episode 665: winner = -1, steps = 5\n",
      "00:44:16 [INFO] train episode 666: winner = 1, steps = 4\n",
      "00:44:18 [INFO] train episode 667: winner = 0, steps = 8\n",
      "00:44:18 [INFO] train episode 668: winner = 0, steps = 8\n",
      "00:44:19 [INFO] train episode 669: winner = 1, steps = 4\n",
      "00:44:20 [INFO] train episode 670: winner = 1, steps = 6\n",
      "00:44:21 [INFO] train episode 671: winner = 0, steps = 8\n",
      "00:44:24 [INFO] train episode 672: winner = 1, steps = 6\n",
      "00:44:26 [INFO] train episode 673: winner = 0, steps = 8\n",
      "00:44:26 [INFO] train episode 674: winner = 1, steps = 4\n",
      "00:44:28 [INFO] train episode 675: winner = 1, steps = 8\n",
      "00:44:28 [INFO] train episode 676: winner = 0, steps = 8\n",
      "00:44:29 [INFO] train episode 677: winner = 1, steps = 4\n",
      "00:44:30 [INFO] train episode 678: winner = 1, steps = 4\n",
      "00:44:33 [INFO] train episode 679: winner = 0, steps = 8\n",
      "00:44:33 [INFO] train episode 680: winner = -1, steps = 5\n",
      "00:44:36 [INFO] train episode 681: winner = 1, steps = 6\n",
      "00:44:38 [INFO] train episode 682: winner = 0, steps = 8\n",
      "00:44:39 [INFO] train episode 683: winner = -1, steps = 7\n",
      "00:44:39 [INFO] train episode 684: winner = 1, steps = 8\n",
      "00:44:40 [INFO] train episode 685: winner = 1, steps = 8\n",
      "00:44:41 [INFO] train episode 686: winner = -1, steps = 5\n",
      "00:44:42 [INFO] train episode 687: winner = 0, steps = 8\n",
      "00:44:43 [INFO] train episode 688: winner = -1, steps = 5\n",
      "00:44:44 [INFO] train episode 689: winner = 1, steps = 6\n",
      "00:44:45 [INFO] train episode 690: winner = 0, steps = 8\n",
      "00:44:46 [INFO] train episode 691: winner = 1, steps = 4\n",
      "00:44:47 [INFO] train episode 692: winner = 1, steps = 6\n",
      "00:44:47 [INFO] train episode 693: winner = 1, steps = 4\n",
      "00:44:47 [INFO] train episode 694: winner = 1, steps = 4\n",
      "00:44:50 [INFO] train episode 695: winner = 0, steps = 8\n",
      "00:44:51 [INFO] train episode 696: winner = -1, steps = 5\n",
      "00:44:52 [INFO] train episode 697: winner = 1, steps = 4\n",
      "00:44:52 [INFO] train episode 698: winner = -1, steps = 5\n",
      "00:44:56 [INFO] train episode 699: winner = 0, steps = 8\n",
      "00:44:58 [INFO] train episode 700: winner = 0, steps = 8\n",
      "00:44:58 [INFO] test episode 700:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "00:45:07 [INFO] step 0：player 1, action (0, 2)\n",
      "++o\n",
      "+++\n",
      "+++\n",
      "00:45:15 [INFO] step 1：player -1, action (0, 1)\n",
      "+xo\n",
      "+++\n",
      "+++\n",
      "00:45:19 [INFO] step 2：player 1, action (2, 0)\n",
      "+xo\n",
      "+++\n",
      "o++\n",
      "00:45:22 [INFO] step 3：player -1, action (1, 1)\n",
      "+xo\n",
      "+x+\n",
      "o++\n",
      "00:45:23 [INFO] step 4：player 1, action (2, 1)\n",
      "+xo\n",
      "+x+\n",
      "oo+\n",
      "00:45:23 [INFO] step 5：player -1, action (2, 2)\n",
      "+xo\n",
      "+x+\n",
      "oox\n",
      "00:45:23 [INFO] step 6：player 1, action (1, 0)\n",
      "+xo\n",
      "ox+\n",
      "oox\n",
      "00:45:23 [INFO] step 7：player -1, action (0, 0)\n",
      "xxo\n",
      "ox+\n",
      "oox\n",
      "00:45:23 [INFO] test episode 700: winner = -1, steps = 7\n",
      "00:45:38 [INFO] train episode 701: winner = 1, steps = 4\n",
      "00:45:54 [INFO] train episode 702: winner = -1, steps = 7\n",
      "00:46:10 [INFO] train episode 703: winner = -1, steps = 5\n",
      "00:46:22 [INFO] train episode 704: winner = 1, steps = 4\n",
      "00:46:24 [INFO] train episode 705: winner = 1, steps = 4\n",
      "00:46:36 [INFO] train episode 706: winner = 1, steps = 4\n",
      "00:46:49 [INFO] train episode 707: winner = 1, steps = 4\n",
      "00:47:02 [INFO] train episode 708: winner = 1, steps = 8\n",
      "00:47:06 [INFO] train episode 709: winner = -1, steps = 7\n",
      "00:47:16 [INFO] train episode 710: winner = 0, steps = 8\n",
      "00:47:25 [INFO] train episode 711: winner = 0, steps = 8\n",
      "00:47:28 [INFO] train episode 712: winner = 1, steps = 4\n",
      "00:47:33 [INFO] train episode 713: winner = 1, steps = 4\n",
      "00:47:49 [INFO] train episode 714: winner = 1, steps = 6\n",
      "00:47:53 [INFO] train episode 715: winner = -1, steps = 7\n",
      "00:48:00 [INFO] train episode 716: winner = 1, steps = 6\n",
      "00:48:02 [INFO] train episode 717: winner = 1, steps = 4\n",
      "00:48:06 [INFO] train episode 718: winner = 1, steps = 6\n",
      "00:48:09 [INFO] train episode 719: winner = 1, steps = 6\n",
      "00:48:13 [INFO] train episode 720: winner = -1, steps = 5\n",
      "00:48:19 [INFO] train episode 721: winner = -1, steps = 7\n",
      "00:48:22 [INFO] train episode 722: winner = -1, steps = 5\n",
      "00:48:25 [INFO] train episode 723: winner = 1, steps = 6\n",
      "00:48:28 [INFO] train episode 724: winner = 1, steps = 6\n",
      "00:48:31 [INFO] train episode 725: winner = -1, steps = 7\n",
      "00:48:33 [INFO] train episode 726: winner = 1, steps = 6\n",
      "00:48:34 [INFO] train episode 727: winner = 1, steps = 4\n",
      "00:48:34 [INFO] train episode 728: winner = 0, steps = 8\n",
      "00:48:34 [INFO] train episode 729: winner = 1, steps = 4\n",
      "00:48:35 [INFO] train episode 730: winner = 1, steps = 4\n",
      "00:48:37 [INFO] train episode 731: winner = 1, steps = 4\n",
      "00:48:37 [INFO] train episode 732: winner = 1, steps = 4\n",
      "00:48:42 [INFO] train episode 733: winner = 1, steps = 4\n",
      "00:48:45 [INFO] train episode 734: winner = 0, steps = 8\n",
      "00:48:53 [INFO] train episode 735: winner = -1, steps = 5\n",
      "00:48:56 [INFO] train episode 736: winner = -1, steps = 5\n",
      "00:49:00 [INFO] train episode 737: winner = 1, steps = 6\n",
      "00:49:04 [INFO] train episode 738: winner = 0, steps = 8\n",
      "00:49:07 [INFO] train episode 739: winner = 1, steps = 8\n",
      "00:49:09 [INFO] train episode 740: winner = 1, steps = 6\n",
      "00:49:11 [INFO] train episode 741: winner = 1, steps = 4\n",
      "00:49:13 [INFO] train episode 742: winner = -1, steps = 7\n",
      "00:49:13 [INFO] train episode 743: winner = -1, steps = 7\n",
      "00:49:13 [INFO] train episode 744: winner = -1, steps = 5\n",
      "00:49:15 [INFO] train episode 745: winner = 0, steps = 8\n",
      "00:49:20 [INFO] train episode 746: winner = 0, steps = 8\n",
      "00:49:20 [INFO] train episode 747: winner = 1, steps = 6\n",
      "00:49:22 [INFO] train episode 748: winner = 1, steps = 4\n",
      "00:49:24 [INFO] train episode 749: winner = 0, steps = 8\n",
      "00:49:25 [INFO] train episode 750: winner = 1, steps = 6\n",
      "00:49:26 [INFO] train episode 751: winner = 1, steps = 8\n",
      "00:49:28 [INFO] train episode 752: winner = 1, steps = 6\n",
      "00:49:30 [INFO] train episode 753: winner = 1, steps = 4\n",
      "00:49:31 [INFO] train episode 754: winner = 1, steps = 6\n",
      "00:49:33 [INFO] train episode 755: winner = 1, steps = 6\n",
      "00:49:34 [INFO] train episode 756: winner = 1, steps = 4\n",
      "00:49:35 [INFO] train episode 757: winner = 1, steps = 6\n",
      "00:49:36 [INFO] train episode 758: winner = 1, steps = 4\n",
      "00:49:38 [INFO] train episode 759: winner = 0, steps = 8\n",
      "00:49:38 [INFO] train episode 760: winner = 1, steps = 6\n",
      "00:49:39 [INFO] train episode 761: winner = 1, steps = 4\n",
      "00:49:39 [INFO] train episode 762: winner = 1, steps = 6\n",
      "00:49:40 [INFO] train episode 763: winner = 1, steps = 4\n",
      "00:49:45 [INFO] train episode 764: winner = 0, steps = 8\n",
      "00:49:46 [INFO] train episode 765: winner = 1, steps = 4\n",
      "00:49:47 [INFO] train episode 766: winner = 1, steps = 4\n",
      "00:49:48 [INFO] train episode 767: winner = 1, steps = 4\n",
      "00:49:49 [INFO] train episode 768: winner = 1, steps = 4\n",
      "00:49:49 [INFO] train episode 769: winner = 1, steps = 6\n",
      "00:49:52 [INFO] train episode 770: winner = -1, steps = 7\n",
      "00:49:54 [INFO] train episode 771: winner = 1, steps = 6\n",
      "00:49:59 [INFO] train episode 772: winner = 0, steps = 8\n",
      "00:49:59 [INFO] train episode 773: winner = 1, steps = 4\n",
      "00:50:03 [INFO] train episode 774: winner = 0, steps = 8\n",
      "00:50:07 [INFO] train episode 775: winner = 0, steps = 8\n",
      "00:50:07 [INFO] train episode 776: winner = 1, steps = 4\n",
      "00:50:09 [INFO] train episode 777: winner = 1, steps = 6\n",
      "00:50:10 [INFO] train episode 778: winner = 0, steps = 8\n",
      "00:50:11 [INFO] train episode 779: winner = 1, steps = 6\n",
      "00:50:12 [INFO] train episode 780: winner = 1, steps = 4\n",
      "00:50:12 [INFO] train episode 781: winner = 1, steps = 6\n",
      "00:50:13 [INFO] train episode 782: winner = 1, steps = 6\n",
      "00:50:13 [INFO] train episode 783: winner = 1, steps = 6\n",
      "00:50:16 [INFO] train episode 784: winner = 0, steps = 8\n",
      "00:50:21 [INFO] train episode 785: winner = 0, steps = 8\n",
      "00:50:23 [INFO] train episode 786: winner = 0, steps = 8\n",
      "00:50:25 [INFO] train episode 787: winner = -1, steps = 5\n",
      "00:50:28 [INFO] train episode 788: winner = 0, steps = 8\n",
      "00:50:29 [INFO] train episode 789: winner = 1, steps = 6\n",
      "00:50:29 [INFO] train episode 790: winner = 1, steps = 4\n",
      "00:50:30 [INFO] train episode 791: winner = 1, steps = 6\n",
      "00:50:30 [INFO] train episode 792: winner = 1, steps = 4\n",
      "00:50:30 [INFO] train episode 793: winner = 1, steps = 4\n",
      "00:50:32 [INFO] train episode 794: winner = 1, steps = 6\n",
      "00:50:35 [INFO] train episode 795: winner = -1, steps = 7\n",
      "00:50:36 [INFO] train episode 796: winner = 1, steps = 4\n",
      "00:50:37 [INFO] train episode 797: winner = -1, steps = 7\n",
      "00:50:37 [INFO] train episode 798: winner = 1, steps = 4\n",
      "00:50:37 [INFO] train episode 799: winner = 1, steps = 6\n",
      "00:50:38 [INFO] train episode 800: winner = -1, steps = 5\n",
      "00:50:41 [INFO] train episode 801: winner = 0, steps = 8\n",
      "00:50:42 [INFO] train episode 802: winner = 0, steps = 8\n",
      "00:50:46 [INFO] train episode 803: winner = 0, steps = 8\n",
      "00:50:46 [INFO] train episode 804: winner = -1, steps = 7\n",
      "00:50:46 [INFO] train episode 805: winner = 0, steps = 8\n",
      "00:50:47 [INFO] train episode 806: winner = -1, steps = 5\n",
      "00:50:47 [INFO] train episode 807: winner = 1, steps = 4\n",
      "00:50:47 [INFO] train episode 808: winner = 0, steps = 8\n",
      "00:50:49 [INFO] train episode 809: winner = 1, steps = 6\n",
      "00:50:51 [INFO] train episode 810: winner = 0, steps = 8\n",
      "00:50:51 [INFO] train episode 811: winner = 0, steps = 8\n",
      "00:50:54 [INFO] train episode 812: winner = 1, steps = 6\n",
      "00:50:55 [INFO] train episode 813: winner = 1, steps = 4\n",
      "00:50:59 [INFO] train episode 814: winner = 0, steps = 8\n",
      "00:50:59 [INFO] train episode 815: winner = 0, steps = 8\n",
      "00:51:00 [INFO] train episode 816: winner = 1, steps = 6\n",
      "00:51:00 [INFO] train episode 817: winner = 1, steps = 6\n",
      "00:51:02 [INFO] train episode 818: winner = 0, steps = 8\n",
      "00:51:03 [INFO] train episode 819: winner = 1, steps = 4\n",
      "00:51:04 [INFO] train episode 820: winner = -1, steps = 5\n",
      "00:51:04 [INFO] train episode 821: winner = 1, steps = 4\n",
      "00:51:05 [INFO] train episode 822: winner = 1, steps = 4\n",
      "00:51:05 [INFO] train episode 823: winner = 1, steps = 4\n",
      "00:51:06 [INFO] train episode 824: winner = 1, steps = 6\n",
      "00:51:07 [INFO] train episode 825: winner = 0, steps = 8\n",
      "00:51:07 [INFO] train episode 826: winner = 1, steps = 4\n",
      "00:51:08 [INFO] train episode 827: winner = 0, steps = 8\n",
      "00:51:08 [INFO] train episode 828: winner = 1, steps = 6\n",
      "00:51:10 [INFO] train episode 829: winner = 1, steps = 6\n",
      "00:51:13 [INFO] train episode 830: winner = 0, steps = 8\n",
      "00:51:14 [INFO] train episode 831: winner = 1, steps = 6\n",
      "00:51:15 [INFO] train episode 832: winner = 1, steps = 8\n",
      "00:51:15 [INFO] train episode 833: winner = 1, steps = 4\n",
      "00:51:17 [INFO] train episode 834: winner = 1, steps = 4\n",
      "00:51:18 [INFO] train episode 835: winner = 0, steps = 8\n",
      "00:51:18 [INFO] train episode 836: winner = 1, steps = 4\n",
      "00:51:19 [INFO] train episode 837: winner = 1, steps = 4\n",
      "00:51:20 [INFO] train episode 838: winner = 1, steps = 8\n",
      "00:51:20 [INFO] train episode 839: winner = 1, steps = 4\n",
      "00:51:21 [INFO] train episode 840: winner = -1, steps = 7\n",
      "00:51:23 [INFO] train episode 841: winner = 1, steps = 6\n",
      "00:51:24 [INFO] train episode 842: winner = 1, steps = 4\n",
      "00:51:25 [INFO] train episode 843: winner = 1, steps = 6\n",
      "00:51:26 [INFO] train episode 844: winner = -1, steps = 5\n",
      "00:51:26 [INFO] train episode 845: winner = 1, steps = 6\n",
      "00:51:28 [INFO] train episode 846: winner = 1, steps = 6\n",
      "00:51:28 [INFO] test episode 846:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "00:51:37 [INFO] step 0：player 1, action (1, 2)\n",
      "+++\n",
      "++o\n",
      "+++\n",
      "00:51:45 [INFO] step 1：player -1, action (1, 0)\n",
      "+++\n",
      "x+o\n",
      "+++\n",
      "00:51:52 [INFO] step 2：player 1, action (2, 0)\n",
      "+++\n",
      "x+o\n",
      "o++\n",
      "00:51:56 [INFO] step 3：player -1, action (2, 2)\n",
      "+++\n",
      "x+o\n",
      "o+x\n",
      "00:51:57 [INFO] step 4：player 1, action (0, 0)\n",
      "o++\n",
      "x+o\n",
      "o+x\n",
      "00:51:58 [INFO] step 5：player -1, action (0, 2)\n",
      "o+x\n",
      "x+o\n",
      "o+x\n",
      "00:51:58 [INFO] step 6：player 1, action (1, 1)\n",
      "o+x\n",
      "xoo\n",
      "o+x\n",
      "00:51:59 [INFO] step 7：player -1, action (2, 1)\n",
      "o+x\n",
      "xoo\n",
      "oxx\n",
      "00:51:59 [INFO] step 8：player 1, action (0, 1)\n",
      "oox\n",
      "xoo\n",
      "oxx\n",
      "00:51:59 [INFO] test episode 846: winner = 0, steps = 8\n",
      "00:52:12 [INFO] train episode 847: winner = 1, steps = 4\n",
      "00:52:31 [INFO] train episode 848: winner = 0, steps = 8\n",
      "00:52:49 [INFO] train episode 849: winner = 0, steps = 8\n",
      "00:52:56 [INFO] train episode 850: winner = 1, steps = 8\n",
      "00:53:03 [INFO] train episode 851: winner = 1, steps = 6\n",
      "00:53:11 [INFO] train episode 852: winner = 0, steps = 8\n",
      "00:53:17 [INFO] train episode 853: winner = 0, steps = 8\n",
      "00:53:26 [INFO] train episode 854: winner = 1, steps = 6\n",
      "00:53:28 [INFO] train episode 855: winner = 1, steps = 6\n",
      "00:53:42 [INFO] train episode 856: winner = 1, steps = 8\n",
      "00:53:54 [INFO] train episode 857: winner = 0, steps = 8\n",
      "00:53:59 [INFO] train episode 858: winner = -1, steps = 5\n",
      "00:54:03 [INFO] train episode 859: winner = 1, steps = 8\n",
      "00:54:11 [INFO] train episode 860: winner = 0, steps = 8\n",
      "00:54:13 [INFO] train episode 861: winner = 0, steps = 8\n",
      "00:54:25 [INFO] train episode 862: winner = 1, steps = 8\n",
      "00:54:30 [INFO] train episode 863: winner = -1, steps = 7\n",
      "00:54:36 [INFO] train episode 864: winner = 1, steps = 6\n",
      "00:54:41 [INFO] train episode 865: winner = 1, steps = 4\n",
      "00:54:47 [INFO] train episode 866: winner = -1, steps = 7\n",
      "00:54:50 [INFO] train episode 867: winner = 1, steps = 6\n",
      "00:54:51 [INFO] train episode 868: winner = 1, steps = 6\n",
      "00:54:53 [INFO] train episode 869: winner = 1, steps = 6\n",
      "00:54:55 [INFO] train episode 870: winner = 1, steps = 6\n",
      "00:55:00 [INFO] train episode 871: winner = 0, steps = 8\n",
      "00:55:05 [INFO] train episode 872: winner = 1, steps = 4\n",
      "00:55:12 [INFO] train episode 873: winner = 0, steps = 8\n",
      "00:55:15 [INFO] train episode 874: winner = -1, steps = 5\n",
      "00:55:16 [INFO] train episode 875: winner = 1, steps = 4\n",
      "00:55:19 [INFO] train episode 876: winner = 0, steps = 8\n",
      "00:55:21 [INFO] train episode 877: winner = 0, steps = 8\n",
      "00:55:27 [INFO] train episode 878: winner = 0, steps = 8\n",
      "00:55:33 [INFO] train episode 879: winner = 0, steps = 8\n",
      "00:55:36 [INFO] train episode 880: winner = 1, steps = 6\n",
      "00:55:37 [INFO] train episode 881: winner = 1, steps = 4\n",
      "00:55:37 [INFO] train episode 882: winner = 1, steps = 6\n",
      "00:55:43 [INFO] train episode 883: winner = 0, steps = 8\n",
      "00:55:47 [INFO] train episode 884: winner = 1, steps = 6\n",
      "00:55:47 [INFO] train episode 885: winner = 1, steps = 4\n",
      "00:55:49 [INFO] train episode 886: winner = 1, steps = 6\n",
      "00:55:51 [INFO] train episode 887: winner = 1, steps = 6\n",
      "00:55:52 [INFO] train episode 888: winner = -1, steps = 7\n",
      "00:55:55 [INFO] train episode 889: winner = 1, steps = 6\n",
      "00:55:57 [INFO] train episode 890: winner = 1, steps = 8\n",
      "00:56:01 [INFO] train episode 891: winner = 0, steps = 8\n",
      "00:56:02 [INFO] train episode 892: winner = -1, steps = 7\n",
      "00:56:08 [INFO] train episode 893: winner = 0, steps = 8\n",
      "00:56:09 [INFO] train episode 894: winner = -1, steps = 5\n",
      "00:56:11 [INFO] train episode 895: winner = -1, steps = 7\n",
      "00:56:13 [INFO] train episode 896: winner = 1, steps = 6\n",
      "00:56:15 [INFO] train episode 897: winner = 1, steps = 6\n",
      "00:56:16 [INFO] train episode 898: winner = 1, steps = 4\n",
      "00:56:17 [INFO] train episode 899: winner = 1, steps = 8\n",
      "00:56:17 [INFO] train episode 900: winner = 0, steps = 8\n",
      "00:56:18 [INFO] train episode 901: winner = 1, steps = 4\n",
      "00:56:20 [INFO] train episode 902: winner = 1, steps = 6\n",
      "00:56:21 [INFO] train episode 903: winner = 1, steps = 6\n",
      "00:56:21 [INFO] train episode 904: winner = 0, steps = 8\n",
      "00:56:22 [INFO] train episode 905: winner = 1, steps = 6\n",
      "00:56:24 [INFO] train episode 906: winner = 1, steps = 6\n",
      "00:56:29 [INFO] train episode 907: winner = 1, steps = 8\n",
      "00:56:29 [INFO] train episode 908: winner = 0, steps = 8\n",
      "00:56:30 [INFO] train episode 909: winner = 1, steps = 4\n",
      "00:56:32 [INFO] train episode 910: winner = -1, steps = 7\n",
      "00:56:32 [INFO] train episode 911: winner = 1, steps = 6\n",
      "00:56:32 [INFO] train episode 912: winner = 0, steps = 8\n",
      "00:56:33 [INFO] train episode 913: winner = 0, steps = 8\n",
      "00:56:34 [INFO] train episode 914: winner = 1, steps = 6\n",
      "00:56:35 [INFO] train episode 915: winner = 1, steps = 4\n",
      "00:56:38 [INFO] train episode 916: winner = 1, steps = 8\n",
      "00:56:42 [INFO] train episode 917: winner = 0, steps = 8\n",
      "00:56:46 [INFO] train episode 918: winner = -1, steps = 5\n",
      "00:56:46 [INFO] train episode 919: winner = 1, steps = 4\n",
      "00:56:46 [INFO] train episode 920: winner = 1, steps = 6\n",
      "00:56:47 [INFO] train episode 921: winner = 1, steps = 6\n",
      "00:56:51 [INFO] train episode 922: winner = -1, steps = 7\n",
      "00:56:51 [INFO] train episode 923: winner = 1, steps = 4\n",
      "00:56:54 [INFO] train episode 924: winner = 1, steps = 4\n",
      "00:56:54 [INFO] train episode 925: winner = 1, steps = 6\n",
      "00:56:58 [INFO] train episode 926: winner = 0, steps = 8\n",
      "00:56:59 [INFO] train episode 927: winner = 1, steps = 6\n",
      "00:57:02 [INFO] train episode 928: winner = 0, steps = 8\n",
      "00:57:02 [INFO] train episode 929: winner = 1, steps = 4\n",
      "00:57:03 [INFO] train episode 930: winner = 1, steps = 8\n",
      "00:57:05 [INFO] train episode 931: winner = 1, steps = 6\n",
      "00:57:06 [INFO] train episode 932: winner = 1, steps = 6\n",
      "00:57:09 [INFO] train episode 933: winner = 1, steps = 8\n",
      "00:57:11 [INFO] train episode 934: winner = 0, steps = 8\n",
      "00:57:12 [INFO] train episode 935: winner = -1, steps = 7\n",
      "00:57:13 [INFO] train episode 936: winner = -1, steps = 5\n",
      "00:57:14 [INFO] train episode 937: winner = -1, steps = 7\n",
      "00:57:14 [INFO] train episode 938: winner = 1, steps = 4\n",
      "00:57:15 [INFO] train episode 939: winner = -1, steps = 5\n",
      "00:57:18 [INFO] train episode 940: winner = 0, steps = 8\n",
      "00:57:20 [INFO] train episode 941: winner = 0, steps = 8\n",
      "00:57:22 [INFO] train episode 942: winner = 0, steps = 8\n",
      "00:57:23 [INFO] train episode 943: winner = 1, steps = 4\n",
      "00:57:27 [INFO] train episode 944: winner = 0, steps = 8\n",
      "00:57:27 [INFO] train episode 945: winner = 0, steps = 8\n",
      "00:57:27 [INFO] train episode 946: winner = 1, steps = 6\n",
      "00:57:29 [INFO] train episode 947: winner = 0, steps = 8\n",
      "00:57:30 [INFO] train episode 948: winner = 1, steps = 6\n",
      "00:57:31 [INFO] train episode 949: winner = 0, steps = 8\n",
      "00:57:33 [INFO] train episode 950: winner = 0, steps = 8\n",
      "00:57:33 [INFO] train episode 951: winner = 0, steps = 8\n",
      "00:57:33 [INFO] train episode 952: winner = 1, steps = 6\n",
      "00:57:36 [INFO] train episode 953: winner = 1, steps = 8\n",
      "00:57:37 [INFO] train episode 954: winner = 0, steps = 8\n",
      "00:57:39 [INFO] train episode 955: winner = 1, steps = 6\n",
      "00:57:40 [INFO] train episode 956: winner = 0, steps = 8\n",
      "00:57:41 [INFO] train episode 957: winner = 0, steps = 8\n",
      "00:57:41 [INFO] train episode 958: winner = 1, steps = 6\n",
      "00:57:43 [INFO] train episode 959: winner = 1, steps = 8\n",
      "00:57:44 [INFO] train episode 960: winner = 0, steps = 8\n",
      "00:57:44 [INFO] train episode 961: winner = 1, steps = 6\n",
      "00:57:45 [INFO] train episode 962: winner = 1, steps = 4\n",
      "00:57:47 [INFO] train episode 963: winner = 0, steps = 8\n",
      "00:57:49 [INFO] train episode 964: winner = 1, steps = 6\n",
      "00:57:49 [INFO] train episode 965: winner = 1, steps = 6\n",
      "00:57:50 [INFO] train episode 966: winner = 1, steps = 6\n",
      "00:57:50 [INFO] train episode 967: winner = 1, steps = 6\n",
      "00:57:51 [INFO] train episode 968: winner = -1, steps = 5\n",
      "00:57:52 [INFO] train episode 969: winner = 1, steps = 6\n",
      "00:57:52 [INFO] train episode 970: winner = 1, steps = 4\n",
      "00:57:55 [INFO] train episode 971: winner = 0, steps = 8\n",
      "00:57:55 [INFO] train episode 972: winner = 1, steps = 6\n",
      "00:57:58 [INFO] train episode 973: winner = 0, steps = 8\n",
      "00:57:59 [INFO] train episode 974: winner = 0, steps = 8\n",
      "00:58:01 [INFO] train episode 975: winner = 1, steps = 6\n",
      "00:58:02 [INFO] train episode 976: winner = 1, steps = 6\n",
      "00:58:02 [INFO] train episode 977: winner = 0, steps = 8\n",
      "00:58:03 [INFO] train episode 978: winner = 1, steps = 4\n",
      "00:58:05 [INFO] train episode 979: winner = 1, steps = 8\n",
      "00:58:05 [INFO] test episode 979:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "00:58:14 [INFO] step 0：player 1, action (2, 0)\n",
      "+++\n",
      "+++\n",
      "o++\n",
      "00:58:21 [INFO] step 1：player -1, action (0, 0)\n",
      "x++\n",
      "+++\n",
      "o++\n",
      "00:58:26 [INFO] step 2：player 1, action (1, 1)\n",
      "x++\n",
      "+o+\n",
      "o++\n",
      "00:58:28 [INFO] step 3：player -1, action (0, 2)\n",
      "x+x\n",
      "+o+\n",
      "o++\n",
      "00:58:28 [INFO] step 4：player 1, action (0, 1)\n",
      "xox\n",
      "+o+\n",
      "o++\n",
      "00:58:28 [INFO] step 5：player -1, action (2, 1)\n",
      "xox\n",
      "+o+\n",
      "ox+\n",
      "00:58:28 [INFO] step 6：player 1, action (2, 2)\n",
      "xox\n",
      "+o+\n",
      "oxo\n",
      "00:58:28 [INFO] step 7：player -1, action (1, 2)\n",
      "xox\n",
      "+ox\n",
      "oxo\n",
      "00:58:29 [INFO] step 8：player 1, action (1, 0)\n",
      "xox\n",
      "oox\n",
      "oxo\n",
      "00:58:29 [INFO] test episode 979: winner = 0, steps = 8\n",
      "00:58:40 [INFO] train episode 980: winner = 1, steps = 4\n",
      "00:58:47 [INFO] train episode 981: winner = 0, steps = 8\n",
      "00:59:02 [INFO] train episode 982: winner = 1, steps = 6\n",
      "00:59:14 [INFO] train episode 983: winner = -1, steps = 7\n",
      "00:59:15 [INFO] train episode 984: winner = 1, steps = 4\n",
      "00:59:28 [INFO] train episode 985: winner = 1, steps = 4\n",
      "00:59:33 [INFO] train episode 986: winner = 0, steps = 8\n",
      "00:59:41 [INFO] train episode 987: winner = 1, steps = 6\n",
      "00:59:47 [INFO] train episode 988: winner = -1, steps = 5\n",
      "00:59:52 [INFO] train episode 989: winner = -1, steps = 5\n",
      "00:59:57 [INFO] train episode 990: winner = 1, steps = 6\n",
      "01:00:01 [INFO] train episode 991: winner = 1, steps = 6\n",
      "01:00:07 [INFO] train episode 992: winner = -1, steps = 7\n",
      "01:00:08 [INFO] train episode 993: winner = 1, steps = 4\n",
      "01:00:17 [INFO] train episode 994: winner = 1, steps = 6\n",
      "01:00:23 [INFO] train episode 995: winner = 1, steps = 6\n",
      "01:00:30 [INFO] train episode 996: winner = 0, steps = 8\n",
      "01:00:31 [INFO] train episode 997: winner = 1, steps = 4\n",
      "01:00:35 [INFO] train episode 998: winner = 0, steps = 8\n",
      "01:00:40 [INFO] train episode 999: winner = -1, steps = 7\n",
      "01:00:48 [INFO] train episode 1000: winner = 0, steps = 8\n",
      "01:01:02 [INFO] train episode 1001: winner = 1, steps = 6\n",
      "01:01:03 [INFO] train episode 1002: winner = 1, steps = 4\n",
      "01:01:03 [INFO] train episode 1003: winner = 1, steps = 4\n",
      "01:01:05 [INFO] train episode 1004: winner = 1, steps = 4\n",
      "01:01:12 [INFO] train episode 1005: winner = 0, steps = 8\n",
      "01:01:14 [INFO] train episode 1006: winner = 0, steps = 8\n",
      "01:01:14 [INFO] train episode 1007: winner = 1, steps = 6\n",
      "01:01:16 [INFO] train episode 1008: winner = -1, steps = 7\n",
      "01:01:19 [INFO] train episode 1009: winner = 1, steps = 6\n",
      "01:01:23 [INFO] train episode 1010: winner = 1, steps = 6\n",
      "01:01:23 [INFO] train episode 1011: winner = 1, steps = 4\n",
      "01:01:26 [INFO] train episode 1012: winner = 1, steps = 6\n",
      "01:01:28 [INFO] train episode 1013: winner = -1, steps = 7\n",
      "01:01:29 [INFO] train episode 1014: winner = -1, steps = 5\n",
      "01:01:32 [INFO] train episode 1015: winner = 1, steps = 6\n",
      "01:01:33 [INFO] train episode 1016: winner = 1, steps = 6\n",
      "01:01:35 [INFO] train episode 1017: winner = 1, steps = 4\n",
      "01:01:36 [INFO] train episode 1018: winner = -1, steps = 5\n",
      "01:01:38 [INFO] train episode 1019: winner = 0, steps = 8\n",
      "01:01:43 [INFO] train episode 1020: winner = 1, steps = 4\n",
      "01:01:46 [INFO] train episode 1021: winner = 1, steps = 6\n",
      "01:01:50 [INFO] train episode 1022: winner = 0, steps = 8\n",
      "01:02:01 [INFO] train episode 1023: winner = 0, steps = 8\n",
      "01:02:03 [INFO] train episode 1024: winner = 1, steps = 6\n",
      "01:02:04 [INFO] train episode 1025: winner = 1, steps = 6\n",
      "01:02:04 [INFO] train episode 1026: winner = 0, steps = 8\n",
      "01:02:10 [INFO] train episode 1027: winner = 0, steps = 8\n",
      "01:02:10 [INFO] train episode 1028: winner = 1, steps = 4\n",
      "01:02:12 [INFO] train episode 1029: winner = 1, steps = 6\n",
      "01:02:14 [INFO] train episode 1030: winner = 1, steps = 4\n",
      "01:02:16 [INFO] train episode 1031: winner = -1, steps = 5\n",
      "01:02:18 [INFO] train episode 1032: winner = 1, steps = 4\n",
      "01:02:21 [INFO] train episode 1033: winner = 0, steps = 8\n",
      "01:02:23 [INFO] train episode 1034: winner = 1, steps = 4\n",
      "01:02:27 [INFO] train episode 1035: winner = 0, steps = 8\n",
      "01:02:29 [INFO] train episode 1036: winner = -1, steps = 5\n",
      "01:02:29 [INFO] train episode 1037: winner = 1, steps = 4\n",
      "01:02:31 [INFO] train episode 1038: winner = 0, steps = 8\n",
      "01:02:31 [INFO] train episode 1039: winner = 1, steps = 6\n",
      "01:02:31 [INFO] train episode 1040: winner = 1, steps = 6\n",
      "01:02:33 [INFO] train episode 1041: winner = 0, steps = 8\n",
      "01:02:33 [INFO] train episode 1042: winner = 1, steps = 6\n",
      "01:02:35 [INFO] train episode 1043: winner = 1, steps = 6\n",
      "01:02:35 [INFO] train episode 1044: winner = 0, steps = 8\n",
      "01:02:41 [INFO] train episode 1045: winner = 0, steps = 8\n",
      "01:02:42 [INFO] train episode 1046: winner = 1, steps = 6\n",
      "01:02:44 [INFO] train episode 1047: winner = 1, steps = 6\n",
      "01:02:47 [INFO] train episode 1048: winner = 0, steps = 8\n",
      "01:02:47 [INFO] train episode 1049: winner = 1, steps = 4\n",
      "01:02:48 [INFO] train episode 1050: winner = 1, steps = 6\n",
      "01:02:49 [INFO] train episode 1051: winner = 1, steps = 4\n",
      "01:02:50 [INFO] train episode 1052: winner = 1, steps = 6\n",
      "01:02:50 [INFO] train episode 1053: winner = -1, steps = 5\n",
      "01:02:51 [INFO] train episode 1054: winner = 0, steps = 8\n",
      "01:02:51 [INFO] train episode 1055: winner = 1, steps = 4\n",
      "01:02:54 [INFO] train episode 1056: winner = 0, steps = 8\n",
      "01:02:55 [INFO] train episode 1057: winner = 1, steps = 6\n",
      "01:02:56 [INFO] train episode 1058: winner = 0, steps = 8\n",
      "01:03:01 [INFO] train episode 1059: winner = -1, steps = 7\n",
      "01:03:02 [INFO] train episode 1060: winner = 0, steps = 8\n",
      "01:03:02 [INFO] train episode 1061: winner = 1, steps = 4\n",
      "01:03:04 [INFO] train episode 1062: winner = 1, steps = 6\n",
      "01:03:05 [INFO] train episode 1063: winner = 1, steps = 6\n",
      "01:03:05 [INFO] train episode 1064: winner = 1, steps = 4\n",
      "01:03:08 [INFO] train episode 1065: winner = 1, steps = 8\n",
      "01:03:11 [INFO] train episode 1066: winner = 0, steps = 8\n",
      "01:03:13 [INFO] train episode 1067: winner = 0, steps = 8\n",
      "01:03:14 [INFO] train episode 1068: winner = 0, steps = 8\n",
      "01:03:17 [INFO] train episode 1069: winner = 1, steps = 6\n",
      "01:03:17 [INFO] train episode 1070: winner = 0, steps = 8\n",
      "01:03:18 [INFO] train episode 1071: winner = 1, steps = 6\n",
      "01:03:20 [INFO] train episode 1072: winner = 1, steps = 6\n",
      "01:03:21 [INFO] train episode 1073: winner = 0, steps = 8\n",
      "01:03:25 [INFO] train episode 1074: winner = 0, steps = 8\n",
      "01:03:26 [INFO] train episode 1075: winner = 1, steps = 6\n",
      "01:03:26 [INFO] train episode 1076: winner = 0, steps = 8\n",
      "01:03:28 [INFO] train episode 1077: winner = 1, steps = 6\n",
      "01:03:29 [INFO] train episode 1078: winner = 1, steps = 4\n",
      "01:03:32 [INFO] train episode 1079: winner = 0, steps = 8\n",
      "01:03:37 [INFO] train episode 1080: winner = 1, steps = 6\n",
      "01:03:37 [INFO] train episode 1081: winner = 0, steps = 8\n",
      "01:03:39 [INFO] train episode 1082: winner = 1, steps = 4\n",
      "01:03:40 [INFO] train episode 1083: winner = 0, steps = 8\n",
      "01:03:41 [INFO] train episode 1084: winner = 0, steps = 8\n",
      "01:03:41 [INFO] train episode 1085: winner = 1, steps = 4\n",
      "01:03:42 [INFO] train episode 1086: winner = 0, steps = 8\n",
      "01:03:43 [INFO] train episode 1087: winner = 1, steps = 6\n",
      "01:03:45 [INFO] train episode 1088: winner = 1, steps = 6\n",
      "01:03:45 [INFO] train episode 1089: winner = 0, steps = 8\n",
      "01:03:46 [INFO] train episode 1090: winner = 1, steps = 6\n",
      "01:03:46 [INFO] train episode 1091: winner = 1, steps = 8\n",
      "01:03:49 [INFO] train episode 1092: winner = 0, steps = 8\n",
      "01:03:50 [INFO] train episode 1093: winner = 0, steps = 8\n",
      "01:03:53 [INFO] train episode 1094: winner = 1, steps = 8\n",
      "01:03:53 [INFO] train episode 1095: winner = 0, steps = 8\n",
      "01:03:53 [INFO] train episode 1096: winner = 1, steps = 4\n",
      "01:03:57 [INFO] train episode 1097: winner = 1, steps = 6\n",
      "01:03:57 [INFO] train episode 1098: winner = -1, steps = 5\n",
      "01:03:59 [INFO] train episode 1099: winner = 0, steps = 8\n",
      "01:04:00 [INFO] train episode 1100: winner = 1, steps = 6\n",
      "01:04:01 [INFO] train episode 1101: winner = 1, steps = 6\n",
      "01:04:03 [INFO] train episode 1102: winner = 1, steps = 4\n",
      "01:04:03 [INFO] train episode 1103: winner = 1, steps = 6\n",
      "01:04:03 [INFO] train episode 1104: winner = 0, steps = 8\n",
      "01:04:03 [INFO] train episode 1105: winner = 1, steps = 4\n",
      "01:04:07 [INFO] train episode 1106: winner = 0, steps = 8\n",
      "01:04:09 [INFO] train episode 1107: winner = -1, steps = 5\n",
      "01:04:11 [INFO] train episode 1108: winner = -1, steps = 7\n",
      "01:04:13 [INFO] train episode 1109: winner = 0, steps = 8\n",
      "01:04:13 [INFO] train episode 1110: winner = -1, steps = 7\n",
      "01:04:15 [INFO] train episode 1111: winner = -1, steps = 7\n",
      "01:04:15 [INFO] train episode 1112: winner = 1, steps = 6\n",
      "01:04:15 [INFO] train episode 1113: winner = 1, steps = 4\n",
      "01:04:17 [INFO] train episode 1114: winner = 1, steps = 8\n",
      "01:04:19 [INFO] train episode 1115: winner = 0, steps = 8\n",
      "01:04:19 [INFO] train episode 1116: winner = 1, steps = 4\n",
      "01:04:21 [INFO] train episode 1117: winner = 1, steps = 6\n",
      "01:04:21 [INFO] test episode 1117:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "01:04:31 [INFO] step 0：player 1, action (2, 1)\n",
      "+++\n",
      "+++\n",
      "+o+\n",
      "01:04:39 [INFO] step 1：player -1, action (1, 2)\n",
      "+++\n",
      "++x\n",
      "+o+\n",
      "01:04:44 [INFO] step 2：player 1, action (0, 1)\n",
      "+o+\n",
      "++x\n",
      "+o+\n",
      "01:04:46 [INFO] step 3：player -1, action (1, 0)\n",
      "+o+\n",
      "x+x\n",
      "+o+\n",
      "01:04:47 [INFO] step 4：player 1, action (1, 1)\n",
      "+o+\n",
      "xox\n",
      "+o+\n",
      "01:04:47 [INFO] test episode 1117: winner = 1, steps = 4\n",
      "01:04:58 [INFO] train episode 1118: winner = 1, steps = 4\n",
      "01:05:14 [INFO] train episode 1119: winner = -1, steps = 7\n",
      "01:05:32 [INFO] train episode 1120: winner = 0, steps = 8\n",
      "01:05:42 [INFO] train episode 1121: winner = 1, steps = 6\n",
      "01:05:59 [INFO] train episode 1122: winner = 1, steps = 8\n",
      "01:06:14 [INFO] train episode 1123: winner = 0, steps = 8\n",
      "01:06:19 [INFO] train episode 1124: winner = -1, steps = 5\n",
      "01:06:20 [INFO] train episode 1125: winner = -1, steps = 5\n",
      "01:06:29 [INFO] train episode 1126: winner = -1, steps = 5\n",
      "01:06:36 [INFO] train episode 1127: winner = 1, steps = 6\n",
      "01:06:39 [INFO] train episode 1128: winner = 0, steps = 8\n",
      "01:06:42 [INFO] train episode 1129: winner = 1, steps = 4\n",
      "01:06:52 [INFO] train episode 1130: winner = 1, steps = 6\n",
      "01:07:03 [INFO] train episode 1131: winner = 1, steps = 4\n",
      "01:07:08 [INFO] train episode 1132: winner = 1, steps = 4\n",
      "01:07:11 [INFO] train episode 1133: winner = 1, steps = 4\n",
      "01:07:13 [INFO] train episode 1134: winner = 0, steps = 8\n",
      "01:07:16 [INFO] train episode 1135: winner = 1, steps = 6\n",
      "01:07:19 [INFO] train episode 1136: winner = 1, steps = 4\n",
      "01:07:24 [INFO] train episode 1137: winner = -1, steps = 5\n",
      "01:07:26 [INFO] train episode 1138: winner = -1, steps = 5\n",
      "01:07:32 [INFO] train episode 1139: winner = 0, steps = 8\n",
      "01:07:33 [INFO] train episode 1140: winner = 1, steps = 4\n",
      "01:07:34 [INFO] train episode 1141: winner = 0, steps = 8\n",
      "01:07:38 [INFO] train episode 1142: winner = 1, steps = 6\n",
      "01:07:39 [INFO] train episode 1143: winner = 1, steps = 4\n",
      "01:07:41 [INFO] train episode 1144: winner = 0, steps = 8\n",
      "01:07:45 [INFO] train episode 1145: winner = 1, steps = 8\n",
      "01:07:46 [INFO] train episode 1146: winner = 1, steps = 4\n",
      "01:07:47 [INFO] train episode 1147: winner = 1, steps = 4\n",
      "01:07:49 [INFO] train episode 1148: winner = 1, steps = 6\n",
      "01:07:53 [INFO] train episode 1149: winner = -1, steps = 5\n",
      "01:07:56 [INFO] train episode 1150: winner = 1, steps = 6\n",
      "01:07:58 [INFO] train episode 1151: winner = -1, steps = 5\n",
      "01:08:02 [INFO] train episode 1152: winner = 1, steps = 6\n",
      "01:08:04 [INFO] train episode 1153: winner = 1, steps = 4\n",
      "01:08:04 [INFO] train episode 1154: winner = 1, steps = 6\n",
      "01:08:05 [INFO] train episode 1155: winner = 1, steps = 6\n",
      "01:08:07 [INFO] train episode 1156: winner = 1, steps = 6\n",
      "01:08:07 [INFO] train episode 1157: winner = 1, steps = 6\n",
      "01:08:08 [INFO] train episode 1158: winner = 1, steps = 6\n",
      "01:08:13 [INFO] train episode 1159: winner = 1, steps = 4\n",
      "01:08:13 [INFO] train episode 1160: winner = 1, steps = 4\n",
      "01:08:13 [INFO] train episode 1161: winner = 1, steps = 4\n",
      "01:08:18 [INFO] train episode 1162: winner = 1, steps = 4\n",
      "01:08:19 [INFO] train episode 1163: winner = 1, steps = 6\n",
      "01:08:21 [INFO] train episode 1164: winner = 1, steps = 6\n",
      "01:08:24 [INFO] train episode 1165: winner = 0, steps = 8\n",
      "01:08:24 [INFO] train episode 1166: winner = 1, steps = 4\n",
      "01:08:27 [INFO] train episode 1167: winner = 1, steps = 8\n",
      "01:08:29 [INFO] train episode 1168: winner = -1, steps = 5\n",
      "01:08:30 [INFO] train episode 1169: winner = 1, steps = 6\n",
      "01:08:32 [INFO] train episode 1170: winner = 1, steps = 4\n",
      "01:08:33 [INFO] train episode 1171: winner = 1, steps = 6\n",
      "01:08:36 [INFO] train episode 1172: winner = -1, steps = 5\n",
      "01:08:38 [INFO] train episode 1173: winner = 1, steps = 4\n",
      "01:08:39 [INFO] train episode 1174: winner = 1, steps = 6\n",
      "01:08:40 [INFO] train episode 1175: winner = 0, steps = 8\n",
      "01:08:41 [INFO] train episode 1176: winner = -1, steps = 5\n",
      "01:08:42 [INFO] train episode 1177: winner = -1, steps = 7\n",
      "01:08:44 [INFO] train episode 1178: winner = 1, steps = 6\n",
      "01:08:46 [INFO] train episode 1179: winner = 0, steps = 8\n",
      "01:08:51 [INFO] train episode 1180: winner = 0, steps = 8\n",
      "01:08:51 [INFO] train episode 1181: winner = 1, steps = 4\n",
      "01:08:53 [INFO] train episode 1182: winner = 1, steps = 6\n",
      "01:08:54 [INFO] train episode 1183: winner = 0, steps = 8\n",
      "01:08:54 [INFO] train episode 1184: winner = 1, steps = 6\n",
      "01:08:55 [INFO] train episode 1185: winner = 1, steps = 6\n",
      "01:08:55 [INFO] train episode 1186: winner = 1, steps = 6\n",
      "01:08:58 [INFO] train episode 1187: winner = 0, steps = 8\n",
      "01:09:01 [INFO] train episode 1188: winner = 1, steps = 6\n",
      "01:09:03 [INFO] train episode 1189: winner = 1, steps = 8\n",
      "01:09:08 [INFO] train episode 1190: winner = 1, steps = 6\n",
      "01:09:14 [INFO] train episode 1191: winner = 0, steps = 8\n",
      "01:09:14 [INFO] train episode 1192: winner = 1, steps = 6\n",
      "01:09:16 [INFO] train episode 1193: winner = 0, steps = 8\n",
      "01:09:20 [INFO] train episode 1194: winner = 1, steps = 8\n",
      "01:09:21 [INFO] train episode 1195: winner = 1, steps = 6\n",
      "01:09:21 [INFO] train episode 1196: winner = 0, steps = 8\n",
      "01:09:23 [INFO] train episode 1197: winner = 0, steps = 8\n",
      "01:09:27 [INFO] train episode 1198: winner = 0, steps = 8\n",
      "01:09:27 [INFO] train episode 1199: winner = 0, steps = 8\n",
      "01:09:31 [INFO] train episode 1200: winner = 0, steps = 8\n",
      "01:09:31 [INFO] train episode 1201: winner = 1, steps = 4\n",
      "01:09:35 [INFO] train episode 1202: winner = -1, steps = 5\n",
      "01:09:37 [INFO] train episode 1203: winner = 1, steps = 6\n",
      "01:09:38 [INFO] train episode 1204: winner = -1, steps = 7\n",
      "01:09:38 [INFO] train episode 1205: winner = 1, steps = 4\n",
      "01:09:41 [INFO] train episode 1206: winner = 1, steps = 6\n",
      "01:09:41 [INFO] train episode 1207: winner = -1, steps = 5\n",
      "01:09:42 [INFO] train episode 1208: winner = -1, steps = 5\n",
      "01:09:46 [INFO] train episode 1209: winner = 0, steps = 8\n",
      "01:09:47 [INFO] train episode 1210: winner = -1, steps = 7\n",
      "01:09:50 [INFO] train episode 1211: winner = 0, steps = 8\n",
      "01:09:54 [INFO] train episode 1212: winner = 1, steps = 6\n",
      "01:09:54 [INFO] train episode 1213: winner = 0, steps = 8\n",
      "01:09:56 [INFO] train episode 1214: winner = 1, steps = 6\n",
      "01:09:57 [INFO] train episode 1215: winner = 1, steps = 6\n",
      "01:09:58 [INFO] train episode 1216: winner = 1, steps = 4\n",
      "01:09:58 [INFO] train episode 1217: winner = -1, steps = 5\n",
      "01:09:58 [INFO] train episode 1218: winner = 1, steps = 4\n",
      "01:09:58 [INFO] train episode 1219: winner = 1, steps = 6\n",
      "01:10:01 [INFO] train episode 1220: winner = -1, steps = 5\n",
      "01:10:01 [INFO] train episode 1221: winner = 1, steps = 6\n",
      "01:10:02 [INFO] train episode 1222: winner = 1, steps = 4\n",
      "01:10:04 [INFO] train episode 1223: winner = 0, steps = 8\n",
      "01:10:04 [INFO] train episode 1224: winner = 1, steps = 6\n",
      "01:10:07 [INFO] train episode 1225: winner = -1, steps = 5\n",
      "01:10:08 [INFO] train episode 1226: winner = 1, steps = 6\n",
      "01:10:11 [INFO] train episode 1227: winner = 0, steps = 8\n",
      "01:10:13 [INFO] train episode 1228: winner = 0, steps = 8\n",
      "01:10:14 [INFO] train episode 1229: winner = 0, steps = 8\n",
      "01:10:15 [INFO] train episode 1230: winner = 1, steps = 6\n",
      "01:10:15 [INFO] train episode 1231: winner = -1, steps = 7\n",
      "01:10:16 [INFO] train episode 1232: winner = 1, steps = 6\n",
      "01:10:18 [INFO] train episode 1233: winner = 1, steps = 6\n",
      "01:10:19 [INFO] train episode 1234: winner = 1, steps = 6\n",
      "01:10:20 [INFO] train episode 1235: winner = 1, steps = 6\n",
      "01:10:20 [INFO] train episode 1236: winner = 1, steps = 6\n",
      "01:10:20 [INFO] train episode 1237: winner = 1, steps = 4\n",
      "01:10:24 [INFO] train episode 1238: winner = 0, steps = 8\n",
      "01:10:27 [INFO] train episode 1239: winner = 0, steps = 8\n",
      "01:10:27 [INFO] train episode 1240: winner = 1, steps = 4\n",
      "01:10:27 [INFO] train episode 1241: winner = 0, steps = 8\n",
      "01:10:28 [INFO] train episode 1242: winner = -1, steps = 7\n",
      "01:10:29 [INFO] train episode 1243: winner = 1, steps = 4\n",
      "01:10:30 [INFO] train episode 1244: winner = 0, steps = 8\n",
      "01:10:33 [INFO] train episode 1245: winner = 1, steps = 6\n",
      "01:10:34 [INFO] train episode 1246: winner = 1, steps = 6\n",
      "01:10:37 [INFO] train episode 1247: winner = 0, steps = 8\n",
      "01:10:38 [INFO] train episode 1248: winner = 1, steps = 4\n",
      "01:10:38 [INFO] train episode 1249: winner = 1, steps = 6\n",
      "01:10:41 [INFO] train episode 1250: winner = 0, steps = 8\n",
      "01:10:42 [INFO] train episode 1251: winner = 1, steps = 4\n",
      "01:10:45 [INFO] train episode 1252: winner = 1, steps = 8\n",
      "01:10:48 [INFO] train episode 1253: winner = 0, steps = 8\n",
      "01:10:48 [INFO] train episode 1254: winner = 1, steps = 6\n",
      "01:10:49 [INFO] train episode 1255: winner = -1, steps = 5\n",
      "01:10:50 [INFO] train episode 1256: winner = 1, steps = 4\n",
      "01:10:50 [INFO] train episode 1257: winner = 1, steps = 6\n",
      "01:10:50 [INFO] train episode 1258: winner = 0, steps = 8\n",
      "01:10:52 [INFO] train episode 1259: winner = 1, steps = 6\n",
      "01:10:52 [INFO] test episode 1259:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "01:11:01 [INFO] step 0：player 1, action (2, 1)\n",
      "+++\n",
      "+++\n",
      "+o+\n",
      "01:11:09 [INFO] step 1：player -1, action (2, 2)\n",
      "+++\n",
      "+++\n",
      "+ox\n",
      "01:11:15 [INFO] step 2：player 1, action (2, 0)\n",
      "+++\n",
      "+++\n",
      "oox\n",
      "01:11:18 [INFO] step 3：player -1, action (0, 2)\n",
      "++x\n",
      "+++\n",
      "oox\n",
      "01:11:19 [INFO] step 4：player 1, action (1, 2)\n",
      "++x\n",
      "++o\n",
      "oox\n",
      "01:11:19 [INFO] step 5：player -1, action (0, 0)\n",
      "x+x\n",
      "++o\n",
      "oox\n",
      "01:11:19 [INFO] step 6：player 1, action (0, 1)\n",
      "xox\n",
      "++o\n",
      "oox\n",
      "01:11:19 [INFO] step 7：player -1, action (1, 1)\n",
      "xox\n",
      "+xo\n",
      "oox\n",
      "01:11:19 [INFO] test episode 1259: winner = -1, steps = 7\n",
      "01:11:33 [INFO] train episode 1260: winner = 1, steps = 4\n",
      "01:11:49 [INFO] train episode 1261: winner = -1, steps = 5\n",
      "01:12:01 [INFO] train episode 1262: winner = 1, steps = 6\n",
      "01:12:10 [INFO] train episode 1263: winner = 1, steps = 6\n",
      "01:12:19 [INFO] train episode 1264: winner = 1, steps = 4\n",
      "01:12:30 [INFO] train episode 1265: winner = 0, steps = 8\n",
      "01:12:33 [INFO] train episode 1266: winner = 1, steps = 4\n",
      "01:12:45 [INFO] train episode 1267: winner = 1, steps = 6\n",
      "01:12:47 [INFO] train episode 1268: winner = 1, steps = 4\n",
      "01:12:50 [INFO] train episode 1269: winner = 1, steps = 4\n",
      "01:12:56 [INFO] train episode 1270: winner = 0, steps = 8\n",
      "01:13:08 [INFO] train episode 1271: winner = 1, steps = 6\n",
      "01:13:11 [INFO] train episode 1272: winner = 1, steps = 6\n",
      "01:13:18 [INFO] train episode 1273: winner = 0, steps = 8\n",
      "01:13:23 [INFO] train episode 1274: winner = 0, steps = 8\n",
      "01:13:29 [INFO] train episode 1275: winner = -1, steps = 7\n",
      "01:13:30 [INFO] train episode 1276: winner = 1, steps = 4\n",
      "01:13:37 [INFO] train episode 1277: winner = 0, steps = 8\n",
      "01:13:40 [INFO] train episode 1278: winner = 1, steps = 6\n",
      "01:13:47 [INFO] train episode 1279: winner = 0, steps = 8\n",
      "01:13:50 [INFO] train episode 1280: winner = 0, steps = 8\n",
      "01:13:53 [INFO] train episode 1281: winner = 1, steps = 4\n",
      "01:13:58 [INFO] train episode 1282: winner = -1, steps = 7\n",
      "01:14:03 [INFO] train episode 1283: winner = 0, steps = 8\n",
      "01:14:04 [INFO] train episode 1284: winner = 1, steps = 6\n",
      "01:14:09 [INFO] train episode 1285: winner = 1, steps = 4\n",
      "01:14:16 [INFO] train episode 1286: winner = 0, steps = 8\n",
      "01:14:18 [INFO] train episode 1287: winner = 1, steps = 6\n",
      "01:14:20 [INFO] train episode 1288: winner = 1, steps = 4\n",
      "01:14:26 [INFO] train episode 1289: winner = 0, steps = 8\n",
      "01:14:27 [INFO] train episode 1290: winner = 1, steps = 4\n",
      "01:14:28 [INFO] train episode 1291: winner = 0, steps = 8\n",
      "01:14:29 [INFO] train episode 1292: winner = 1, steps = 6\n",
      "01:14:33 [INFO] train episode 1293: winner = 0, steps = 8\n",
      "01:14:34 [INFO] train episode 1294: winner = 1, steps = 6\n",
      "01:14:35 [INFO] train episode 1295: winner = 1, steps = 4\n",
      "01:14:37 [INFO] train episode 1296: winner = 1, steps = 4\n",
      "01:14:38 [INFO] train episode 1297: winner = -1, steps = 7\n",
      "01:14:40 [INFO] train episode 1298: winner = 1, steps = 4\n",
      "01:14:48 [INFO] train episode 1299: winner = 0, steps = 8\n",
      "01:14:52 [INFO] train episode 1300: winner = 0, steps = 8\n",
      "01:14:53 [INFO] train episode 1301: winner = 0, steps = 8\n",
      "01:14:53 [INFO] train episode 1302: winner = 1, steps = 6\n",
      "01:15:02 [INFO] train episode 1303: winner = -1, steps = 7\n",
      "01:15:06 [INFO] train episode 1304: winner = 1, steps = 6\n",
      "01:15:08 [INFO] train episode 1305: winner = 0, steps = 8\n",
      "01:15:09 [INFO] train episode 1306: winner = 1, steps = 4\n",
      "01:15:09 [INFO] train episode 1307: winner = -1, steps = 5\n",
      "01:15:13 [INFO] train episode 1308: winner = 0, steps = 8\n",
      "01:15:14 [INFO] train episode 1309: winner = 0, steps = 8\n",
      "01:15:18 [INFO] train episode 1310: winner = -1, steps = 7\n",
      "01:15:19 [INFO] train episode 1311: winner = 0, steps = 8\n",
      "01:15:21 [INFO] train episode 1312: winner = 0, steps = 8\n",
      "01:15:25 [INFO] train episode 1313: winner = 0, steps = 8\n",
      "01:15:27 [INFO] train episode 1314: winner = 1, steps = 6\n",
      "01:15:28 [INFO] train episode 1315: winner = 1, steps = 4\n",
      "01:15:28 [INFO] train episode 1316: winner = 1, steps = 4\n",
      "01:15:30 [INFO] train episode 1317: winner = 1, steps = 4\n",
      "01:15:32 [INFO] train episode 1318: winner = 1, steps = 4\n",
      "01:15:34 [INFO] train episode 1319: winner = 1, steps = 4\n",
      "01:15:36 [INFO] train episode 1320: winner = 1, steps = 6\n",
      "01:15:38 [INFO] train episode 1321: winner = 1, steps = 6\n",
      "01:15:39 [INFO] train episode 1322: winner = 1, steps = 4\n",
      "01:15:43 [INFO] train episode 1323: winner = 0, steps = 8\n",
      "01:15:44 [INFO] train episode 1324: winner = 1, steps = 6\n",
      "01:15:44 [INFO] train episode 1325: winner = 1, steps = 4\n",
      "01:15:49 [INFO] train episode 1326: winner = 0, steps = 8\n",
      "01:15:52 [INFO] train episode 1327: winner = 0, steps = 8\n",
      "01:15:54 [INFO] train episode 1328: winner = 1, steps = 4\n",
      "01:15:56 [INFO] train episode 1329: winner = 1, steps = 4\n",
      "01:15:56 [INFO] train episode 1330: winner = 1, steps = 4\n",
      "01:15:59 [INFO] train episode 1331: winner = 0, steps = 8\n",
      "01:16:01 [INFO] train episode 1332: winner = 1, steps = 6\n",
      "01:16:06 [INFO] train episode 1333: winner = 0, steps = 8\n",
      "01:16:07 [INFO] train episode 1334: winner = 1, steps = 6\n",
      "01:16:10 [INFO] train episode 1335: winner = 0, steps = 8\n",
      "01:16:12 [INFO] train episode 1336: winner = 1, steps = 6\n",
      "01:16:12 [INFO] train episode 1337: winner = 1, steps = 6\n",
      "01:16:14 [INFO] train episode 1338: winner = 0, steps = 8\n",
      "01:16:17 [INFO] train episode 1339: winner = 0, steps = 8\n",
      "01:16:17 [INFO] train episode 1340: winner = 0, steps = 8\n",
      "01:16:20 [INFO] train episode 1341: winner = 0, steps = 8\n",
      "01:16:22 [INFO] train episode 1342: winner = 0, steps = 8\n",
      "01:16:25 [INFO] train episode 1343: winner = 1, steps = 4\n",
      "01:16:26 [INFO] train episode 1344: winner = 1, steps = 6\n",
      "01:16:28 [INFO] train episode 1345: winner = 1, steps = 8\n",
      "01:16:28 [INFO] train episode 1346: winner = 1, steps = 4\n",
      "01:16:28 [INFO] train episode 1347: winner = 1, steps = 4\n",
      "01:16:29 [INFO] train episode 1348: winner = 1, steps = 6\n",
      "01:16:32 [INFO] train episode 1349: winner = 0, steps = 8\n",
      "01:16:34 [INFO] train episode 1350: winner = 1, steps = 4\n",
      "01:16:34 [INFO] train episode 1351: winner = 1, steps = 6\n",
      "01:16:36 [INFO] train episode 1352: winner = 0, steps = 8\n",
      "01:16:36 [INFO] train episode 1353: winner = 1, steps = 6\n",
      "01:16:40 [INFO] train episode 1354: winner = 0, steps = 8\n",
      "01:16:40 [INFO] train episode 1355: winner = 1, steps = 6\n",
      "01:16:41 [INFO] train episode 1356: winner = -1, steps = 5\n",
      "01:16:42 [INFO] train episode 1357: winner = 1, steps = 4\n",
      "01:16:43 [INFO] train episode 1358: winner = 1, steps = 6\n",
      "01:16:43 [INFO] train episode 1359: winner = 1, steps = 4\n",
      "01:16:46 [INFO] train episode 1360: winner = 1, steps = 6\n",
      "01:16:46 [INFO] train episode 1361: winner = 1, steps = 6\n",
      "01:16:47 [INFO] train episode 1362: winner = 1, steps = 6\n",
      "01:16:47 [INFO] train episode 1363: winner = 1, steps = 4\n",
      "01:16:48 [INFO] train episode 1364: winner = 0, steps = 8\n",
      "01:16:48 [INFO] train episode 1365: winner = 1, steps = 6\n",
      "01:16:50 [INFO] train episode 1366: winner = 0, steps = 8\n",
      "01:16:51 [INFO] train episode 1367: winner = 0, steps = 8\n",
      "01:16:51 [INFO] train episode 1368: winner = 0, steps = 8\n",
      "01:16:51 [INFO] train episode 1369: winner = 0, steps = 8\n",
      "01:16:52 [INFO] train episode 1370: winner = 1, steps = 4\n",
      "01:16:52 [INFO] train episode 1371: winner = 1, steps = 4\n",
      "01:16:52 [INFO] train episode 1372: winner = 1, steps = 8\n",
      "01:16:52 [INFO] train episode 1373: winner = 0, steps = 8\n",
      "01:16:54 [INFO] train episode 1374: winner = 1, steps = 4\n",
      "01:16:55 [INFO] train episode 1375: winner = -1, steps = 7\n",
      "01:16:58 [INFO] train episode 1376: winner = 1, steps = 8\n",
      "01:16:59 [INFO] train episode 1377: winner = 1, steps = 8\n",
      "01:17:01 [INFO] train episode 1378: winner = 0, steps = 8\n",
      "01:17:04 [INFO] train episode 1379: winner = 0, steps = 8\n",
      "01:17:04 [INFO] train episode 1380: winner = 1, steps = 4\n",
      "01:17:04 [INFO] train episode 1381: winner = 1, steps = 6\n",
      "01:17:04 [INFO] train episode 1382: winner = 0, steps = 8\n",
      "01:17:06 [INFO] train episode 1383: winner = 1, steps = 4\n",
      "01:17:06 [INFO] train episode 1384: winner = 1, steps = 4\n",
      "01:17:06 [INFO] train episode 1385: winner = 0, steps = 8\n",
      "01:17:09 [INFO] train episode 1386: winner = 0, steps = 8\n",
      "01:17:14 [INFO] train episode 1387: winner = 0, steps = 8\n",
      "01:17:16 [INFO] train episode 1388: winner = -1, steps = 5\n",
      "01:17:17 [INFO] train episode 1389: winner = 1, steps = 6\n",
      "01:17:19 [INFO] train episode 1390: winner = 0, steps = 8\n",
      "01:17:20 [INFO] train episode 1391: winner = 1, steps = 4\n",
      "01:17:20 [INFO] train episode 1392: winner = 0, steps = 8\n",
      "01:17:23 [INFO] train episode 1393: winner = 0, steps = 8\n",
      "01:17:23 [INFO] train episode 1394: winner = 1, steps = 4\n",
      "01:17:23 [INFO] train episode 1395: winner = 0, steps = 8\n",
      "01:17:24 [INFO] train episode 1396: winner = 1, steps = 6\n",
      "01:17:25 [INFO] train episode 1397: winner = 0, steps = 8\n",
      "01:17:25 [INFO] test episode 1397:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "01:17:34 [INFO] step 0：player 1, action (0, 1)\n",
      "+o+\n",
      "+++\n",
      "+++\n",
      "01:17:41 [INFO] step 1：player -1, action (2, 1)\n",
      "+o+\n",
      "+++\n",
      "+x+\n",
      "01:17:48 [INFO] step 2：player 1, action (2, 2)\n",
      "+o+\n",
      "+++\n",
      "+xo\n",
      "01:17:51 [INFO] step 3：player -1, action (1, 0)\n",
      "+o+\n",
      "x++\n",
      "+xo\n",
      "01:17:53 [INFO] step 4：player 1, action (0, 2)\n",
      "+oo\n",
      "x++\n",
      "+xo\n",
      "01:17:53 [INFO] step 5：player -1, action (1, 1)\n",
      "+oo\n",
      "xx+\n",
      "+xo\n",
      "01:17:53 [INFO] step 6：player 1, action (0, 0)\n",
      "ooo\n",
      "xx+\n",
      "+xo\n",
      "01:17:53 [INFO] test episode 1397: winner = 1, steps = 6\n",
      "01:18:05 [INFO] train episode 1398: winner = -1, steps = 7\n",
      "01:18:08 [INFO] train episode 1399: winner = 1, steps = 4\n",
      "01:18:22 [INFO] train episode 1400: winner = -1, steps = 7\n",
      "01:18:37 [INFO] train episode 1401: winner = -1, steps = 7\n",
      "01:18:51 [INFO] train episode 1402: winner = 0, steps = 8\n",
      "01:18:52 [INFO] train episode 1403: winner = 0, steps = 8\n",
      "01:18:55 [INFO] train episode 1404: winner = 1, steps = 4\n",
      "01:19:03 [INFO] train episode 1405: winner = 0, steps = 8\n",
      "01:19:17 [INFO] train episode 1406: winner = -1, steps = 5\n",
      "01:19:21 [INFO] train episode 1407: winner = 1, steps = 4\n",
      "01:19:28 [INFO] train episode 1408: winner = 0, steps = 8\n",
      "01:19:34 [INFO] train episode 1409: winner = 0, steps = 8\n",
      "01:19:36 [INFO] train episode 1410: winner = 1, steps = 6\n",
      "01:19:45 [INFO] train episode 1411: winner = 0, steps = 8\n",
      "01:19:51 [INFO] train episode 1412: winner = -1, steps = 7\n",
      "01:19:53 [INFO] train episode 1413: winner = 0, steps = 8\n",
      "01:19:59 [INFO] train episode 1414: winner = 1, steps = 6\n",
      "01:20:03 [INFO] train episode 1415: winner = 1, steps = 6\n",
      "01:20:04 [INFO] train episode 1416: winner = 1, steps = 4\n",
      "01:20:07 [INFO] train episode 1417: winner = 1, steps = 6\n",
      "01:20:14 [INFO] train episode 1418: winner = 1, steps = 6\n",
      "01:20:16 [INFO] train episode 1419: winner = -1, steps = 7\n",
      "01:20:17 [INFO] train episode 1420: winner = 1, steps = 4\n",
      "01:20:25 [INFO] train episode 1421: winner = 0, steps = 8\n",
      "01:20:31 [INFO] train episode 1422: winner = 0, steps = 8\n",
      "01:20:32 [INFO] train episode 1423: winner = 1, steps = 4\n",
      "01:20:32 [INFO] train episode 1424: winner = 0, steps = 8\n",
      "01:20:33 [INFO] train episode 1425: winner = 0, steps = 8\n",
      "01:20:33 [INFO] train episode 1426: winner = 1, steps = 4\n",
      "01:20:40 [INFO] train episode 1427: winner = -1, steps = 7\n",
      "01:20:41 [INFO] train episode 1428: winner = 1, steps = 6\n",
      "01:20:45 [INFO] train episode 1429: winner = -1, steps = 7\n",
      "01:20:50 [INFO] train episode 1430: winner = 0, steps = 8\n",
      "01:20:54 [INFO] train episode 1431: winner = 1, steps = 6\n",
      "01:20:57 [INFO] train episode 1432: winner = 0, steps = 8\n",
      "01:21:01 [INFO] train episode 1433: winner = 0, steps = 8\n",
      "01:21:01 [INFO] train episode 1434: winner = 1, steps = 4\n",
      "01:21:06 [INFO] train episode 1435: winner = 0, steps = 8\n",
      "01:21:08 [INFO] train episode 1436: winner = -1, steps = 5\n",
      "01:21:10 [INFO] train episode 1437: winner = 0, steps = 8\n",
      "01:21:13 [INFO] train episode 1438: winner = 1, steps = 6\n",
      "01:21:14 [INFO] train episode 1439: winner = 0, steps = 8\n",
      "01:21:17 [INFO] train episode 1440: winner = 0, steps = 8\n",
      "01:21:23 [INFO] train episode 1441: winner = 0, steps = 8\n",
      "01:21:25 [INFO] train episode 1442: winner = 0, steps = 8\n",
      "01:21:27 [INFO] train episode 1443: winner = 1, steps = 6\n",
      "01:21:31 [INFO] train episode 1444: winner = 1, steps = 4\n",
      "01:21:38 [INFO] train episode 1445: winner = 0, steps = 8\n",
      "01:21:39 [INFO] train episode 1446: winner = 1, steps = 4\n",
      "01:21:39 [INFO] train episode 1447: winner = 1, steps = 6\n",
      "01:21:40 [INFO] train episode 1448: winner = 1, steps = 6\n",
      "01:21:42 [INFO] train episode 1449: winner = 0, steps = 8\n",
      "01:21:43 [INFO] train episode 1450: winner = 0, steps = 8\n",
      "01:21:45 [INFO] train episode 1451: winner = 1, steps = 4\n",
      "01:21:50 [INFO] train episode 1452: winner = -1, steps = 7\n",
      "01:21:52 [INFO] train episode 1453: winner = 1, steps = 6\n",
      "01:21:52 [INFO] train episode 1454: winner = 1, steps = 4\n",
      "01:21:56 [INFO] train episode 1455: winner = 1, steps = 6\n",
      "01:21:57 [INFO] train episode 1456: winner = 1, steps = 6\n",
      "01:21:59 [INFO] train episode 1457: winner = 1, steps = 6\n",
      "01:22:01 [INFO] train episode 1458: winner = 0, steps = 8\n",
      "01:22:01 [INFO] train episode 1459: winner = 0, steps = 8\n",
      "01:22:02 [INFO] train episode 1460: winner = 1, steps = 6\n",
      "01:22:04 [INFO] train episode 1461: winner = 1, steps = 6\n",
      "01:22:04 [INFO] train episode 1462: winner = 1, steps = 4\n",
      "01:22:05 [INFO] train episode 1463: winner = -1, steps = 5\n",
      "01:22:07 [INFO] train episode 1464: winner = -1, steps = 5\n",
      "01:22:07 [INFO] train episode 1465: winner = 1, steps = 4\n",
      "01:22:13 [INFO] train episode 1466: winner = 0, steps = 8\n",
      "01:22:16 [INFO] train episode 1467: winner = 1, steps = 4\n",
      "01:22:16 [INFO] train episode 1468: winner = 0, steps = 8\n",
      "01:22:16 [INFO] train episode 1469: winner = 0, steps = 8\n",
      "01:22:20 [INFO] train episode 1470: winner = 1, steps = 4\n",
      "01:22:21 [INFO] train episode 1471: winner = 1, steps = 4\n",
      "01:22:21 [INFO] train episode 1472: winner = 1, steps = 4\n",
      "01:22:25 [INFO] train episode 1473: winner = -1, steps = 5\n",
      "01:22:29 [INFO] train episode 1474: winner = 0, steps = 8\n",
      "01:22:31 [INFO] train episode 1475: winner = -1, steps = 7\n",
      "01:22:33 [INFO] train episode 1476: winner = 1, steps = 4\n",
      "01:22:33 [INFO] train episode 1477: winner = 1, steps = 6\n",
      "01:22:34 [INFO] train episode 1478: winner = 1, steps = 6\n",
      "01:22:35 [INFO] train episode 1479: winner = 0, steps = 8\n",
      "01:22:36 [INFO] train episode 1480: winner = -1, steps = 7\n",
      "01:22:36 [INFO] train episode 1481: winner = -1, steps = 5\n",
      "01:22:37 [INFO] train episode 1482: winner = 0, steps = 8\n",
      "01:22:38 [INFO] train episode 1483: winner = 1, steps = 6\n",
      "01:22:39 [INFO] train episode 1484: winner = 1, steps = 6\n",
      "01:22:41 [INFO] train episode 1485: winner = 0, steps = 8\n",
      "01:22:43 [INFO] train episode 1486: winner = 1, steps = 4\n",
      "01:22:47 [INFO] train episode 1487: winner = 0, steps = 8\n",
      "01:22:47 [INFO] train episode 1488: winner = -1, steps = 5\n",
      "01:22:49 [INFO] train episode 1489: winner = 1, steps = 6\n",
      "01:22:50 [INFO] train episode 1490: winner = 0, steps = 8\n",
      "01:22:50 [INFO] train episode 1491: winner = -1, steps = 7\n",
      "01:22:50 [INFO] train episode 1492: winner = 0, steps = 8\n",
      "01:22:51 [INFO] train episode 1493: winner = -1, steps = 7\n",
      "01:22:54 [INFO] train episode 1494: winner = 1, steps = 6\n",
      "01:22:54 [INFO] train episode 1495: winner = 1, steps = 6\n",
      "01:22:54 [INFO] train episode 1496: winner = 1, steps = 6\n",
      "01:22:55 [INFO] train episode 1497: winner = 1, steps = 6\n",
      "01:22:55 [INFO] train episode 1498: winner = 1, steps = 6\n",
      "01:22:57 [INFO] train episode 1499: winner = 0, steps = 8\n",
      "01:22:58 [INFO] train episode 1500: winner = 1, steps = 6\n",
      "01:22:58 [INFO] train episode 1501: winner = 0, steps = 8\n",
      "01:22:58 [INFO] train episode 1502: winner = 1, steps = 4\n",
      "01:23:00 [INFO] train episode 1503: winner = 0, steps = 8\n",
      "01:23:03 [INFO] train episode 1504: winner = 0, steps = 8\n",
      "01:23:03 [INFO] train episode 1505: winner = 1, steps = 4\n",
      "01:23:03 [INFO] train episode 1506: winner = 1, steps = 4\n",
      "01:23:05 [INFO] train episode 1507: winner = 1, steps = 6\n",
      "01:23:06 [INFO] train episode 1508: winner = 1, steps = 4\n",
      "01:23:06 [INFO] train episode 1509: winner = 0, steps = 8\n",
      "01:23:12 [INFO] train episode 1510: winner = 1, steps = 6\n",
      "01:23:12 [INFO] train episode 1511: winner = 1, steps = 4\n",
      "01:23:12 [INFO] train episode 1512: winner = -1, steps = 5\n",
      "01:23:14 [INFO] train episode 1513: winner = 0, steps = 8\n",
      "01:23:15 [INFO] train episode 1514: winner = 0, steps = 8\n",
      "01:23:15 [INFO] train episode 1515: winner = 1, steps = 4\n",
      "01:23:18 [INFO] train episode 1516: winner = 1, steps = 8\n",
      "01:23:20 [INFO] train episode 1517: winner = 0, steps = 8\n",
      "01:23:22 [INFO] train episode 1518: winner = 0, steps = 8\n",
      "01:23:24 [INFO] train episode 1519: winner = 0, steps = 8\n",
      "01:23:25 [INFO] train episode 1520: winner = 0, steps = 8\n",
      "01:23:26 [INFO] train episode 1521: winner = 0, steps = 8\n",
      "01:23:27 [INFO] train episode 1522: winner = 1, steps = 6\n",
      "01:23:27 [INFO] train episode 1523: winner = 0, steps = 8\n",
      "01:23:27 [INFO] train episode 1524: winner = 1, steps = 4\n",
      "01:23:27 [INFO] train episode 1525: winner = 1, steps = 4\n",
      "01:23:27 [INFO] train episode 1526: winner = 0, steps = 8\n",
      "01:23:29 [INFO] train episode 1527: winner = -1, steps = 5\n",
      "01:23:29 [INFO] train episode 1528: winner = 0, steps = 8\n",
      "01:23:30 [INFO] train episode 1529: winner = 0, steps = 8\n",
      "01:23:31 [INFO] train episode 1530: winner = -1, steps = 7\n",
      "01:23:33 [INFO] train episode 1531: winner = 0, steps = 8\n",
      "01:23:33 [INFO] train episode 1532: winner = 1, steps = 4\n",
      "01:23:36 [INFO] train episode 1533: winner = 0, steps = 8\n",
      "01:23:36 [INFO] test episode 1533:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "01:23:45 [INFO] step 0：player 1, action (1, 0)\n",
      "+++\n",
      "o++\n",
      "+++\n",
      "01:23:52 [INFO] step 1：player -1, action (1, 2)\n",
      "+++\n",
      "o+x\n",
      "+++\n",
      "01:23:59 [INFO] step 2：player 1, action (0, 0)\n",
      "o++\n",
      "o+x\n",
      "+++\n",
      "01:24:03 [INFO] step 3：player -1, action (2, 0)\n",
      "o++\n",
      "o+x\n",
      "x++\n",
      "01:24:04 [INFO] step 4：player 1, action (2, 1)\n",
      "o++\n",
      "o+x\n",
      "xo+\n",
      "01:24:05 [INFO] step 5：player -1, action (0, 2)\n",
      "o+x\n",
      "o+x\n",
      "xo+\n",
      "01:24:05 [INFO] step 6：player 1, action (2, 2)\n",
      "o+x\n",
      "o+x\n",
      "xoo\n",
      "01:24:05 [INFO] step 7：player -1, action (1, 1)\n",
      "o+x\n",
      "oxx\n",
      "xoo\n",
      "01:24:05 [INFO] test episode 1533: winner = -1, steps = 7\n",
      "01:24:17 [INFO] train episode 1534: winner = 1, steps = 6\n",
      "01:24:30 [INFO] train episode 1535: winner = 1, steps = 4\n",
      "01:24:39 [INFO] train episode 1536: winner = 0, steps = 8\n",
      "01:24:53 [INFO] train episode 1537: winner = 0, steps = 8\n",
      "01:25:10 [INFO] train episode 1538: winner = -1, steps = 7\n",
      "01:25:21 [INFO] train episode 1539: winner = 1, steps = 8\n",
      "01:25:30 [INFO] train episode 1540: winner = 1, steps = 6\n",
      "01:25:37 [INFO] train episode 1541: winner = 1, steps = 4\n",
      "01:25:45 [INFO] train episode 1542: winner = 0, steps = 8\n",
      "01:25:57 [INFO] train episode 1543: winner = 0, steps = 8\n",
      "01:26:00 [INFO] train episode 1544: winner = 0, steps = 8\n",
      "01:26:05 [INFO] train episode 1545: winner = -1, steps = 5\n",
      "01:26:10 [INFO] train episode 1546: winner = 0, steps = 8\n",
      "01:26:23 [INFO] train episode 1547: winner = 1, steps = 6\n",
      "01:26:26 [INFO] train episode 1548: winner = 0, steps = 8\n",
      "01:26:29 [INFO] train episode 1549: winner = 1, steps = 8\n",
      "01:26:36 [INFO] train episode 1550: winner = 1, steps = 4\n",
      "01:26:39 [INFO] train episode 1551: winner = -1, steps = 7\n",
      "01:26:42 [INFO] train episode 1552: winner = 0, steps = 8\n",
      "01:26:44 [INFO] train episode 1553: winner = 0, steps = 8\n",
      "01:26:49 [INFO] train episode 1554: winner = 1, steps = 8\n",
      "01:26:53 [INFO] train episode 1555: winner = 0, steps = 8\n",
      "01:26:55 [INFO] train episode 1556: winner = 1, steps = 6\n",
      "01:26:58 [INFO] train episode 1557: winner = 1, steps = 8\n",
      "01:27:04 [INFO] train episode 1558: winner = 0, steps = 8\n",
      "01:27:08 [INFO] train episode 1559: winner = 1, steps = 8\n",
      "01:27:10 [INFO] train episode 1560: winner = 1, steps = 6\n",
      "01:27:14 [INFO] train episode 1561: winner = 0, steps = 8\n",
      "01:27:17 [INFO] train episode 1562: winner = 1, steps = 6\n",
      "01:27:20 [INFO] train episode 1563: winner = 0, steps = 8\n",
      "01:27:21 [INFO] train episode 1564: winner = 1, steps = 4\n",
      "01:27:23 [INFO] train episode 1565: winner = 1, steps = 6\n",
      "01:27:29 [INFO] train episode 1566: winner = 1, steps = 6\n",
      "01:27:34 [INFO] train episode 1567: winner = 0, steps = 8\n",
      "01:27:35 [INFO] train episode 1568: winner = 1, steps = 4\n",
      "01:27:39 [INFO] train episode 1569: winner = 1, steps = 6\n",
      "01:27:42 [INFO] train episode 1570: winner = 0, steps = 8\n",
      "01:27:42 [INFO] train episode 1571: winner = 1, steps = 6\n",
      "01:27:44 [INFO] train episode 1572: winner = 1, steps = 6\n",
      "01:27:48 [INFO] train episode 1573: winner = 0, steps = 8\n",
      "01:27:50 [INFO] train episode 1574: winner = 1, steps = 8\n",
      "01:27:50 [INFO] train episode 1575: winner = 1, steps = 4\n",
      "01:27:52 [INFO] train episode 1576: winner = 1, steps = 6\n",
      "01:27:56 [INFO] train episode 1577: winner = 0, steps = 8\n",
      "01:27:58 [INFO] train episode 1578: winner = 0, steps = 8\n",
      "01:27:59 [INFO] train episode 1579: winner = 1, steps = 6\n",
      "01:28:00 [INFO] train episode 1580: winner = 0, steps = 8\n",
      "01:28:06 [INFO] train episode 1581: winner = 0, steps = 8\n",
      "01:28:12 [INFO] train episode 1582: winner = 0, steps = 8\n",
      "01:28:12 [INFO] train episode 1583: winner = 1, steps = 6\n",
      "01:28:12 [INFO] train episode 1584: winner = 1, steps = 6\n",
      "01:28:14 [INFO] train episode 1585: winner = 0, steps = 8\n",
      "01:28:15 [INFO] train episode 1586: winner = 1, steps = 4\n",
      "01:28:16 [INFO] train episode 1587: winner = 0, steps = 8\n",
      "01:28:18 [INFO] train episode 1588: winner = 1, steps = 8\n",
      "01:28:20 [INFO] train episode 1589: winner = 0, steps = 8\n",
      "01:28:20 [INFO] train episode 1590: winner = 1, steps = 4\n",
      "01:28:21 [INFO] train episode 1591: winner = 0, steps = 8\n",
      "01:28:25 [INFO] train episode 1592: winner = 1, steps = 6\n",
      "01:28:25 [INFO] train episode 1593: winner = 1, steps = 4\n",
      "01:28:26 [INFO] train episode 1594: winner = 1, steps = 4\n",
      "01:28:26 [INFO] train episode 1595: winner = 0, steps = 8\n",
      "01:28:27 [INFO] train episode 1596: winner = 1, steps = 6\n",
      "01:28:27 [INFO] train episode 1597: winner = -1, steps = 5\n",
      "01:28:27 [INFO] train episode 1598: winner = 1, steps = 6\n",
      "01:28:28 [INFO] train episode 1599: winner = 1, steps = 4\n",
      "01:28:31 [INFO] train episode 1600: winner = 0, steps = 8\n",
      "01:28:33 [INFO] train episode 1601: winner = 1, steps = 6\n",
      "01:28:34 [INFO] train episode 1602: winner = 1, steps = 4\n",
      "01:28:35 [INFO] train episode 1603: winner = 1, steps = 6\n",
      "01:28:35 [INFO] train episode 1604: winner = 1, steps = 4\n",
      "01:28:38 [INFO] train episode 1605: winner = -1, steps = 7\n",
      "01:28:40 [INFO] train episode 1606: winner = 0, steps = 8\n",
      "01:28:43 [INFO] train episode 1607: winner = 0, steps = 8\n",
      "01:28:46 [INFO] train episode 1608: winner = 1, steps = 6\n",
      "01:28:46 [INFO] train episode 1609: winner = 1, steps = 4\n",
      "01:28:47 [INFO] train episode 1610: winner = 1, steps = 6\n",
      "01:28:51 [INFO] train episode 1611: winner = 0, steps = 8\n",
      "01:28:52 [INFO] train episode 1612: winner = 1, steps = 6\n",
      "01:28:52 [INFO] train episode 1613: winner = 0, steps = 8\n",
      "01:28:56 [INFO] train episode 1614: winner = 0, steps = 8\n",
      "01:28:57 [INFO] train episode 1615: winner = 0, steps = 8\n",
      "01:28:59 [INFO] train episode 1616: winner = 0, steps = 8\n",
      "01:28:59 [INFO] train episode 1617: winner = 0, steps = 8\n",
      "01:28:59 [INFO] train episode 1618: winner = 1, steps = 6\n",
      "01:29:00 [INFO] train episode 1619: winner = 1, steps = 6\n",
      "01:29:01 [INFO] train episode 1620: winner = 0, steps = 8\n",
      "01:29:01 [INFO] train episode 1621: winner = 1, steps = 6\n",
      "01:29:02 [INFO] train episode 1622: winner = 0, steps = 8\n",
      "01:29:06 [INFO] train episode 1623: winner = 0, steps = 8\n",
      "01:29:07 [INFO] train episode 1624: winner = 1, steps = 4\n",
      "01:29:07 [INFO] train episode 1625: winner = -1, steps = 7\n",
      "01:29:08 [INFO] train episode 1626: winner = 1, steps = 4\n",
      "01:29:09 [INFO] train episode 1627: winner = 1, steps = 4\n",
      "01:29:12 [INFO] train episode 1628: winner = 1, steps = 4\n",
      "01:29:13 [INFO] train episode 1629: winner = 0, steps = 8\n",
      "01:29:14 [INFO] train episode 1630: winner = 0, steps = 8\n",
      "01:29:18 [INFO] train episode 1631: winner = 0, steps = 8\n",
      "01:29:22 [INFO] train episode 1632: winner = 0, steps = 8\n",
      "01:29:22 [INFO] train episode 1633: winner = 1, steps = 4\n",
      "01:29:22 [INFO] train episode 1634: winner = 0, steps = 8\n",
      "01:29:23 [INFO] train episode 1635: winner = 1, steps = 4\n",
      "01:29:25 [INFO] train episode 1636: winner = 0, steps = 8\n",
      "01:29:25 [INFO] train episode 1637: winner = 1, steps = 4\n",
      "01:29:28 [INFO] train episode 1638: winner = 1, steps = 6\n",
      "01:29:28 [INFO] train episode 1639: winner = 0, steps = 8\n",
      "01:29:28 [INFO] train episode 1640: winner = 1, steps = 4\n",
      "01:29:28 [INFO] train episode 1641: winner = 1, steps = 4\n",
      "01:29:29 [INFO] train episode 1642: winner = 1, steps = 4\n",
      "01:29:29 [INFO] train episode 1643: winner = -1, steps = 5\n",
      "01:29:29 [INFO] train episode 1644: winner = 1, steps = 4\n",
      "01:29:29 [INFO] train episode 1645: winner = 0, steps = 8\n",
      "01:29:29 [INFO] train episode 1646: winner = 0, steps = 8\n",
      "01:29:30 [INFO] train episode 1647: winner = 1, steps = 4\n",
      "01:29:30 [INFO] train episode 1648: winner = 1, steps = 4\n",
      "01:29:30 [INFO] train episode 1649: winner = 0, steps = 8\n",
      "01:29:31 [INFO] train episode 1650: winner = 1, steps = 4\n",
      "01:29:32 [INFO] train episode 1651: winner = 1, steps = 6\n",
      "01:29:32 [INFO] train episode 1652: winner = 1, steps = 6\n",
      "01:29:32 [INFO] train episode 1653: winner = 1, steps = 4\n",
      "01:29:35 [INFO] train episode 1654: winner = -1, steps = 7\n",
      "01:29:35 [INFO] train episode 1655: winner = 1, steps = 4\n",
      "01:29:37 [INFO] train episode 1656: winner = -1, steps = 7\n",
      "01:29:40 [INFO] train episode 1657: winner = -1, steps = 7\n",
      "01:29:41 [INFO] train episode 1658: winner = 0, steps = 8\n",
      "01:29:42 [INFO] train episode 1659: winner = 1, steps = 4\n",
      "01:29:43 [INFO] train episode 1660: winner = 0, steps = 8\n",
      "01:29:44 [INFO] train episode 1661: winner = 1, steps = 6\n",
      "01:29:45 [INFO] train episode 1662: winner = 0, steps = 8\n",
      "01:29:45 [INFO] train episode 1663: winner = 1, steps = 6\n",
      "01:29:48 [INFO] train episode 1664: winner = 1, steps = 6\n",
      "01:29:50 [INFO] train episode 1665: winner = 1, steps = 6\n",
      "01:29:52 [INFO] train episode 1666: winner = 0, steps = 8\n",
      "01:29:53 [INFO] train episode 1667: winner = 1, steps = 6\n",
      "01:29:54 [INFO] train episode 1668: winner = 0, steps = 8\n",
      "01:29:54 [INFO] test episode 1668:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "01:30:03 [INFO] step 0：player 1, action (0, 2)\n",
      "++o\n",
      "+++\n",
      "+++\n",
      "01:30:11 [INFO] step 1：player -1, action (0, 1)\n",
      "+xo\n",
      "+++\n",
      "+++\n",
      "01:30:14 [INFO] step 2：player 1, action (2, 2)\n",
      "+xo\n",
      "+++\n",
      "++o\n",
      "01:30:15 [INFO] step 3：player -1, action (1, 1)\n",
      "+xo\n",
      "+x+\n",
      "++o\n",
      "01:30:15 [INFO] step 4：player 1, action (1, 2)\n",
      "+xo\n",
      "+xo\n",
      "++o\n",
      "01:30:15 [INFO] test episode 1668: winner = 1, steps = 4\n",
      "01:30:33 [INFO] train episode 1669: winner = 0, steps = 8\n",
      "01:30:47 [INFO] train episode 1670: winner = 1, steps = 6\n",
      "01:31:01 [INFO] train episode 1671: winner = 1, steps = 4\n",
      "01:31:15 [INFO] train episode 1672: winner = 1, steps = 6\n",
      "01:31:16 [INFO] train episode 1673: winner = 1, steps = 8\n",
      "01:31:29 [INFO] train episode 1674: winner = 0, steps = 8\n",
      "01:31:40 [INFO] train episode 1675: winner = 0, steps = 8\n",
      "01:31:42 [INFO] train episode 1676: winner = 1, steps = 4\n",
      "01:31:49 [INFO] train episode 1677: winner = 0, steps = 8\n",
      "01:32:02 [INFO] train episode 1678: winner = 1, steps = 8\n",
      "01:32:09 [INFO] train episode 1679: winner = 0, steps = 8\n",
      "01:32:15 [INFO] train episode 1680: winner = 0, steps = 8\n",
      "01:32:23 [INFO] train episode 1681: winner = -1, steps = 5\n",
      "01:32:26 [INFO] train episode 1682: winner = 1, steps = 6\n",
      "01:32:27 [INFO] train episode 1683: winner = 1, steps = 6\n",
      "01:32:42 [INFO] train episode 1684: winner = -1, steps = 7\n",
      "01:32:46 [INFO] train episode 1685: winner = 0, steps = 8\n",
      "01:32:55 [INFO] train episode 1686: winner = 1, steps = 6\n",
      "01:32:59 [INFO] train episode 1687: winner = 0, steps = 8\n",
      "01:33:03 [INFO] train episode 1688: winner = 1, steps = 6\n",
      "01:33:05 [INFO] train episode 1689: winner = 1, steps = 4\n",
      "01:33:06 [INFO] train episode 1690: winner = 1, steps = 8\n",
      "01:33:06 [INFO] train episode 1691: winner = 1, steps = 4\n",
      "01:33:10 [INFO] train episode 1692: winner = -1, steps = 5\n",
      "01:33:13 [INFO] train episode 1693: winner = 1, steps = 6\n",
      "01:33:14 [INFO] train episode 1694: winner = 1, steps = 4\n",
      "01:33:17 [INFO] train episode 1695: winner = 1, steps = 4\n",
      "01:33:22 [INFO] train episode 1696: winner = 0, steps = 8\n",
      "01:33:23 [INFO] train episode 1697: winner = 1, steps = 4\n",
      "01:33:26 [INFO] train episode 1698: winner = 0, steps = 8\n",
      "01:33:28 [INFO] train episode 1699: winner = 1, steps = 6\n",
      "01:33:31 [INFO] train episode 1700: winner = 0, steps = 8\n",
      "01:33:33 [INFO] train episode 1701: winner = 1, steps = 4\n",
      "01:33:34 [INFO] train episode 1702: winner = -1, steps = 5\n",
      "01:33:35 [INFO] train episode 1703: winner = 1, steps = 4\n",
      "01:33:37 [INFO] train episode 1704: winner = -1, steps = 7\n",
      "01:33:42 [INFO] train episode 1705: winner = -1, steps = 5\n",
      "01:33:42 [INFO] train episode 1706: winner = 1, steps = 6\n",
      "01:33:44 [INFO] train episode 1707: winner = 1, steps = 4\n",
      "01:33:45 [INFO] train episode 1708: winner = 0, steps = 8\n",
      "01:33:53 [INFO] train episode 1709: winner = 1, steps = 8\n",
      "01:33:55 [INFO] train episode 1710: winner = -1, steps = 5\n",
      "01:33:56 [INFO] train episode 1711: winner = -1, steps = 5\n",
      "01:34:01 [INFO] train episode 1712: winner = 0, steps = 8\n",
      "01:34:02 [INFO] train episode 1713: winner = 0, steps = 8\n",
      "01:34:04 [INFO] train episode 1714: winner = 1, steps = 6\n",
      "01:34:07 [INFO] train episode 1715: winner = 0, steps = 8\n",
      "01:34:09 [INFO] train episode 1716: winner = 0, steps = 8\n",
      "01:34:10 [INFO] train episode 1717: winner = 1, steps = 4\n",
      "01:34:13 [INFO] train episode 1718: winner = 1, steps = 6\n",
      "01:34:15 [INFO] train episode 1719: winner = 0, steps = 8\n",
      "01:34:17 [INFO] train episode 1720: winner = -1, steps = 7\n",
      "01:34:19 [INFO] train episode 1721: winner = 1, steps = 6\n",
      "01:34:19 [INFO] train episode 1722: winner = 1, steps = 6\n",
      "01:34:20 [INFO] train episode 1723: winner = 1, steps = 4\n",
      "01:34:22 [INFO] train episode 1724: winner = 1, steps = 4\n",
      "01:34:25 [INFO] train episode 1725: winner = 1, steps = 4\n",
      "01:34:29 [INFO] train episode 1726: winner = 0, steps = 8\n",
      "01:34:31 [INFO] train episode 1727: winner = 1, steps = 4\n",
      "01:34:33 [INFO] train episode 1728: winner = 0, steps = 8\n",
      "01:34:33 [INFO] train episode 1729: winner = -1, steps = 7\n",
      "01:34:34 [INFO] train episode 1730: winner = 0, steps = 8\n",
      "01:34:40 [INFO] train episode 1731: winner = 0, steps = 8\n",
      "01:34:42 [INFO] train episode 1732: winner = 1, steps = 4\n",
      "01:34:46 [INFO] train episode 1733: winner = 0, steps = 8\n",
      "01:34:47 [INFO] train episode 1734: winner = 1, steps = 6\n",
      "01:34:48 [INFO] train episode 1735: winner = -1, steps = 5\n",
      "01:34:49 [INFO] train episode 1736: winner = 1, steps = 6\n",
      "01:34:52 [INFO] train episode 1737: winner = 1, steps = 6\n",
      "01:34:53 [INFO] train episode 1738: winner = 1, steps = 4\n",
      "01:34:54 [INFO] train episode 1739: winner = 1, steps = 6\n",
      "01:34:56 [INFO] train episode 1740: winner = 1, steps = 4\n",
      "01:34:59 [INFO] train episode 1741: winner = 0, steps = 8\n",
      "01:34:59 [INFO] train episode 1742: winner = 1, steps = 6\n",
      "01:35:00 [INFO] train episode 1743: winner = 1, steps = 4\n",
      "01:35:03 [INFO] train episode 1744: winner = 0, steps = 8\n",
      "01:35:05 [INFO] train episode 1745: winner = 0, steps = 8\n",
      "01:35:05 [INFO] train episode 1746: winner = 0, steps = 8\n",
      "01:35:06 [INFO] train episode 1747: winner = -1, steps = 7\n",
      "01:35:07 [INFO] train episode 1748: winner = 1, steps = 4\n",
      "01:35:08 [INFO] train episode 1749: winner = 0, steps = 8\n",
      "01:35:11 [INFO] train episode 1750: winner = 0, steps = 8\n",
      "01:35:12 [INFO] train episode 1751: winner = 1, steps = 6\n",
      "01:35:15 [INFO] train episode 1752: winner = 0, steps = 8\n",
      "01:35:16 [INFO] train episode 1753: winner = 1, steps = 4\n",
      "01:35:19 [INFO] train episode 1754: winner = 0, steps = 8\n",
      "01:35:19 [INFO] train episode 1755: winner = 0, steps = 8\n",
      "01:35:19 [INFO] train episode 1756: winner = 1, steps = 4\n",
      "01:35:19 [INFO] train episode 1757: winner = 0, steps = 8\n",
      "01:35:21 [INFO] train episode 1758: winner = 0, steps = 8\n",
      "01:35:24 [INFO] train episode 1759: winner = -1, steps = 7\n",
      "01:35:27 [INFO] train episode 1760: winner = 1, steps = 6\n",
      "01:35:29 [INFO] train episode 1761: winner = 0, steps = 8\n",
      "01:35:30 [INFO] train episode 1762: winner = 1, steps = 4\n",
      "01:35:32 [INFO] train episode 1763: winner = 0, steps = 8\n",
      "01:35:34 [INFO] train episode 1764: winner = 1, steps = 6\n",
      "01:35:34 [INFO] train episode 1765: winner = 1, steps = 4\n",
      "01:35:34 [INFO] train episode 1766: winner = 1, steps = 6\n",
      "01:35:35 [INFO] train episode 1767: winner = 0, steps = 8\n",
      "01:35:36 [INFO] train episode 1768: winner = 0, steps = 8\n",
      "01:35:37 [INFO] train episode 1769: winner = 0, steps = 8\n",
      "01:35:40 [INFO] train episode 1770: winner = 1, steps = 8\n",
      "01:35:41 [INFO] train episode 1771: winner = 0, steps = 8\n",
      "01:35:44 [INFO] train episode 1772: winner = 1, steps = 8\n",
      "01:35:46 [INFO] train episode 1773: winner = 0, steps = 8\n",
      "01:35:46 [INFO] train episode 1774: winner = 1, steps = 6\n",
      "01:35:47 [INFO] train episode 1775: winner = 1, steps = 4\n",
      "01:35:49 [INFO] train episode 1776: winner = 1, steps = 4\n",
      "01:35:52 [INFO] train episode 1777: winner = 0, steps = 8\n",
      "01:35:55 [INFO] train episode 1778: winner = 0, steps = 8\n",
      "01:35:57 [INFO] train episode 1779: winner = 1, steps = 6\n",
      "01:35:57 [INFO] train episode 1780: winner = 1, steps = 4\n",
      "01:35:58 [INFO] train episode 1781: winner = 1, steps = 6\n",
      "01:35:58 [INFO] train episode 1782: winner = 1, steps = 4\n",
      "01:35:59 [INFO] train episode 1783: winner = 1, steps = 6\n",
      "01:36:00 [INFO] train episode 1784: winner = -1, steps = 5\n",
      "01:36:03 [INFO] train episode 1785: winner = 0, steps = 8\n",
      "01:36:04 [INFO] train episode 1786: winner = 1, steps = 6\n",
      "01:36:06 [INFO] train episode 1787: winner = -1, steps = 5\n",
      "01:36:06 [INFO] train episode 1788: winner = 1, steps = 4\n",
      "01:36:08 [INFO] train episode 1789: winner = -1, steps = 5\n",
      "01:36:09 [INFO] train episode 1790: winner = 1, steps = 6\n",
      "01:36:10 [INFO] train episode 1791: winner = 0, steps = 8\n",
      "01:36:10 [INFO] train episode 1792: winner = 1, steps = 4\n",
      "01:36:14 [INFO] train episode 1793: winner = 0, steps = 8\n",
      "01:36:15 [INFO] train episode 1794: winner = 1, steps = 6\n",
      "01:36:15 [INFO] train episode 1795: winner = 1, steps = 4\n",
      "01:36:15 [INFO] train episode 1796: winner = 1, steps = 6\n",
      "01:36:16 [INFO] train episode 1797: winner = 1, steps = 6\n",
      "01:36:16 [INFO] train episode 1798: winner = 1, steps = 4\n",
      "01:36:16 [INFO] train episode 1799: winner = -1, steps = 5\n",
      "01:36:16 [INFO] train episode 1800: winner = 0, steps = 8\n",
      "01:36:19 [INFO] train episode 1801: winner = -1, steps = 7\n",
      "01:36:19 [INFO] train episode 1802: winner = 1, steps = 6\n",
      "01:36:20 [INFO] train episode 1803: winner = 0, steps = 8\n",
      "01:36:20 [INFO] train episode 1804: winner = 0, steps = 8\n",
      "01:36:24 [INFO] train episode 1805: winner = 1, steps = 6\n",
      "01:36:26 [INFO] train episode 1806: winner = 1, steps = 6\n",
      "01:36:26 [INFO] test episode 1806:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "01:36:35 [INFO] step 0：player 1, action (1, 1)\n",
      "+++\n",
      "+o+\n",
      "+++\n",
      "01:36:43 [INFO] step 1：player -1, action (2, 0)\n",
      "+++\n",
      "+o+\n",
      "x++\n",
      "01:36:49 [INFO] step 2：player 1, action (2, 2)\n",
      "+++\n",
      "+o+\n",
      "x+o\n",
      "01:36:50 [INFO] step 3：player -1, action (0, 0)\n",
      "x++\n",
      "+o+\n",
      "x+o\n",
      "01:36:51 [INFO] step 4：player 1, action (0, 1)\n",
      "xo+\n",
      "+o+\n",
      "x+o\n",
      "01:36:51 [INFO] step 5：player -1, action (1, 0)\n",
      "xo+\n",
      "xo+\n",
      "x+o\n",
      "01:36:51 [INFO] test episode 1806: winner = -1, steps = 5\n",
      "01:37:06 [INFO] train episode 1807: winner = 0, steps = 8\n",
      "01:37:17 [INFO] train episode 1808: winner = 0, steps = 8\n",
      "01:37:28 [INFO] train episode 1809: winner = 1, steps = 6\n",
      "01:37:36 [INFO] train episode 1810: winner = 1, steps = 6\n",
      "01:37:48 [INFO] train episode 1811: winner = 0, steps = 8\n",
      "01:37:57 [INFO] train episode 1812: winner = 0, steps = 8\n",
      "01:37:59 [INFO] train episode 1813: winner = -1, steps = 5\n",
      "01:38:12 [INFO] train episode 1814: winner = 0, steps = 8\n",
      "01:38:12 [INFO] train episode 1815: winner = 1, steps = 6\n",
      "01:38:17 [INFO] train episode 1816: winner = 1, steps = 6\n",
      "01:38:30 [INFO] train episode 1817: winner = 1, steps = 6\n",
      "01:38:33 [INFO] train episode 1818: winner = 0, steps = 8\n",
      "01:38:39 [INFO] train episode 1819: winner = 1, steps = 6\n",
      "01:38:47 [INFO] train episode 1820: winner = 0, steps = 8\n",
      "01:38:48 [INFO] train episode 1821: winner = 1, steps = 4\n",
      "01:38:50 [INFO] train episode 1822: winner = 1, steps = 4\n",
      "01:38:54 [INFO] train episode 1823: winner = 0, steps = 8\n",
      "01:38:58 [INFO] train episode 1824: winner = 1, steps = 6\n",
      "01:39:05 [INFO] train episode 1825: winner = -1, steps = 5\n",
      "01:39:09 [INFO] train episode 1826: winner = 1, steps = 4\n",
      "01:39:16 [INFO] train episode 1827: winner = 1, steps = 6\n",
      "01:39:18 [INFO] train episode 1828: winner = 0, steps = 8\n",
      "01:39:29 [INFO] train episode 1829: winner = 1, steps = 6\n",
      "01:39:32 [INFO] train episode 1830: winner = -1, steps = 7\n",
      "01:39:34 [INFO] train episode 1831: winner = 1, steps = 4\n",
      "01:39:35 [INFO] train episode 1832: winner = 1, steps = 4\n",
      "01:39:38 [INFO] train episode 1833: winner = 1, steps = 6\n",
      "01:39:41 [INFO] train episode 1834: winner = 0, steps = 8\n",
      "01:39:42 [INFO] train episode 1835: winner = 1, steps = 6\n",
      "01:39:43 [INFO] train episode 1836: winner = -1, steps = 5\n",
      "01:39:45 [INFO] train episode 1837: winner = -1, steps = 7\n",
      "01:39:50 [INFO] train episode 1838: winner = 0, steps = 8\n",
      "01:39:52 [INFO] train episode 1839: winner = 1, steps = 6\n",
      "01:39:54 [INFO] train episode 1840: winner = 1, steps = 6\n",
      "01:39:58 [INFO] train episode 1841: winner = 1, steps = 6\n",
      "01:40:01 [INFO] train episode 1842: winner = 1, steps = 6\n",
      "01:40:02 [INFO] train episode 1843: winner = 0, steps = 8\n",
      "01:40:04 [INFO] train episode 1844: winner = 0, steps = 8\n",
      "01:40:06 [INFO] train episode 1845: winner = 1, steps = 4\n",
      "01:40:08 [INFO] train episode 1846: winner = 1, steps = 4\n",
      "01:40:08 [INFO] train episode 1847: winner = 1, steps = 6\n",
      "01:40:09 [INFO] train episode 1848: winner = 1, steps = 4\n",
      "01:40:11 [INFO] train episode 1849: winner = -1, steps = 7\n",
      "01:40:14 [INFO] train episode 1850: winner = 1, steps = 6\n",
      "01:40:18 [INFO] train episode 1851: winner = -1, steps = 5\n",
      "01:40:19 [INFO] train episode 1852: winner = 1, steps = 4\n",
      "01:40:22 [INFO] train episode 1853: winner = 1, steps = 6\n",
      "01:40:22 [INFO] train episode 1854: winner = 1, steps = 4\n",
      "01:40:25 [INFO] train episode 1855: winner = 1, steps = 6\n",
      "01:40:29 [INFO] train episode 1856: winner = -1, steps = 5\n",
      "01:40:31 [INFO] train episode 1857: winner = 1, steps = 6\n",
      "01:40:33 [INFO] train episode 1858: winner = 0, steps = 8\n",
      "01:40:36 [INFO] train episode 1859: winner = 1, steps = 6\n",
      "01:40:36 [INFO] train episode 1860: winner = 1, steps = 4\n",
      "01:40:36 [INFO] train episode 1861: winner = 1, steps = 6\n",
      "01:40:38 [INFO] train episode 1862: winner = 0, steps = 8\n",
      "01:40:40 [INFO] train episode 1863: winner = 0, steps = 8\n",
      "01:40:43 [INFO] train episode 1864: winner = 1, steps = 4\n",
      "01:40:47 [INFO] train episode 1865: winner = 1, steps = 6\n",
      "01:40:48 [INFO] train episode 1866: winner = 1, steps = 6\n",
      "01:40:51 [INFO] train episode 1867: winner = 1, steps = 6\n",
      "01:40:54 [INFO] train episode 1868: winner = 1, steps = 6\n",
      "01:40:54 [INFO] train episode 1869: winner = 0, steps = 8\n",
      "01:40:58 [INFO] train episode 1870: winner = 0, steps = 8\n",
      "01:41:00 [INFO] train episode 1871: winner = -1, steps = 5\n",
      "01:41:05 [INFO] train episode 1872: winner = 0, steps = 8\n",
      "01:41:06 [INFO] train episode 1873: winner = 1, steps = 4\n",
      "01:41:06 [INFO] train episode 1874: winner = 0, steps = 8\n",
      "01:41:06 [INFO] train episode 1875: winner = 1, steps = 4\n",
      "01:41:08 [INFO] train episode 1876: winner = 0, steps = 8\n",
      "01:41:10 [INFO] train episode 1877: winner = 1, steps = 8\n",
      "01:41:10 [INFO] train episode 1878: winner = 0, steps = 8\n",
      "01:41:12 [INFO] train episode 1879: winner = 1, steps = 4\n",
      "01:41:14 [INFO] train episode 1880: winner = 0, steps = 8\n",
      "01:41:15 [INFO] train episode 1881: winner = 0, steps = 8\n",
      "01:41:16 [INFO] train episode 1882: winner = 1, steps = 4\n",
      "01:41:17 [INFO] train episode 1883: winner = -1, steps = 5\n",
      "01:41:17 [INFO] train episode 1884: winner = 1, steps = 6\n",
      "01:41:21 [INFO] train episode 1885: winner = 0, steps = 8\n",
      "01:41:22 [INFO] train episode 1886: winner = 1, steps = 4\n",
      "01:41:23 [INFO] train episode 1887: winner = 1, steps = 4\n",
      "01:41:25 [INFO] train episode 1888: winner = 1, steps = 6\n",
      "01:41:26 [INFO] train episode 1889: winner = -1, steps = 5\n",
      "01:41:27 [INFO] train episode 1890: winner = 1, steps = 6\n",
      "01:41:29 [INFO] train episode 1891: winner = 0, steps = 8\n",
      "01:41:31 [INFO] train episode 1892: winner = -1, steps = 7\n",
      "01:41:32 [INFO] train episode 1893: winner = 1, steps = 4\n",
      "01:41:35 [INFO] train episode 1894: winner = 1, steps = 6\n",
      "01:41:36 [INFO] train episode 1895: winner = 1, steps = 4\n",
      "01:41:38 [INFO] train episode 1896: winner = 0, steps = 8\n",
      "01:41:39 [INFO] train episode 1897: winner = 1, steps = 4\n",
      "01:41:39 [INFO] train episode 1898: winner = 1, steps = 4\n",
      "01:41:39 [INFO] train episode 1899: winner = 1, steps = 6\n",
      "01:41:41 [INFO] train episode 1900: winner = 0, steps = 8\n",
      "01:41:41 [INFO] train episode 1901: winner = 1, steps = 4\n",
      "01:41:41 [INFO] train episode 1902: winner = 0, steps = 8\n",
      "01:41:42 [INFO] train episode 1903: winner = 0, steps = 8\n",
      "01:41:42 [INFO] train episode 1904: winner = 1, steps = 4\n",
      "01:41:42 [INFO] train episode 1905: winner = 1, steps = 4\n",
      "01:41:42 [INFO] train episode 1906: winner = 1, steps = 6\n",
      "01:41:43 [INFO] train episode 1907: winner = 1, steps = 4\n",
      "01:41:43 [INFO] train episode 1908: winner = 1, steps = 6\n",
      "01:41:44 [INFO] train episode 1909: winner = 1, steps = 4\n",
      "01:41:45 [INFO] train episode 1910: winner = 0, steps = 8\n",
      "01:41:46 [INFO] train episode 1911: winner = 1, steps = 4\n",
      "01:41:48 [INFO] train episode 1912: winner = 1, steps = 6\n",
      "01:41:49 [INFO] train episode 1913: winner = 1, steps = 8\n",
      "01:41:49 [INFO] train episode 1914: winner = 1, steps = 4\n",
      "01:41:50 [INFO] train episode 1915: winner = 0, steps = 8\n",
      "01:41:52 [INFO] train episode 1916: winner = 0, steps = 8\n",
      "01:41:55 [INFO] train episode 1917: winner = 0, steps = 8\n",
      "01:41:55 [INFO] train episode 1918: winner = 0, steps = 8\n",
      "01:41:56 [INFO] train episode 1919: winner = 1, steps = 6\n",
      "01:41:57 [INFO] train episode 1920: winner = 0, steps = 8\n",
      "01:41:57 [INFO] train episode 1921: winner = 1, steps = 4\n",
      "01:42:04 [INFO] train episode 1922: winner = 0, steps = 8\n",
      "01:42:04 [INFO] train episode 1923: winner = -1, steps = 7\n",
      "01:42:05 [INFO] train episode 1924: winner = 1, steps = 4\n",
      "01:42:06 [INFO] train episode 1925: winner = 0, steps = 8\n",
      "01:42:06 [INFO] train episode 1926: winner = 1, steps = 6\n",
      "01:42:07 [INFO] train episode 1927: winner = 1, steps = 4\n",
      "01:42:08 [INFO] train episode 1928: winner = 1, steps = 6\n",
      "01:42:09 [INFO] train episode 1929: winner = 1, steps = 6\n",
      "01:42:10 [INFO] train episode 1930: winner = 0, steps = 8\n",
      "01:42:11 [INFO] train episode 1931: winner = 1, steps = 6\n",
      "01:42:12 [INFO] train episode 1932: winner = 1, steps = 6\n",
      "01:42:12 [INFO] train episode 1933: winner = 1, steps = 4\n",
      "01:42:13 [INFO] train episode 1934: winner = 0, steps = 8\n",
      "01:42:14 [INFO] train episode 1935: winner = 0, steps = 8\n",
      "01:42:14 [INFO] train episode 1936: winner = 0, steps = 8\n",
      "01:42:18 [INFO] train episode 1937: winner = 1, steps = 8\n",
      "01:42:22 [INFO] train episode 1938: winner = -1, steps = 7\n",
      "01:42:24 [INFO] train episode 1939: winner = 1, steps = 6\n",
      "01:42:24 [INFO] train episode 1940: winner = 1, steps = 4\n",
      "01:42:25 [INFO] train episode 1941: winner = 1, steps = 4\n",
      "01:42:27 [INFO] train episode 1942: winner = 1, steps = 6\n",
      "01:42:28 [INFO] train episode 1943: winner = -1, steps = 5\n",
      "01:42:28 [INFO] train episode 1944: winner = 1, steps = 6\n",
      "01:42:29 [INFO] train episode 1945: winner = 1, steps = 6\n",
      "01:42:33 [INFO] train episode 1946: winner = 0, steps = 8\n",
      "01:42:36 [INFO] train episode 1947: winner = 1, steps = 6\n",
      "01:42:36 [INFO] test episode 1947:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "01:42:44 [INFO] step 0：player 1, action (1, 2)\n",
      "+++\n",
      "++o\n",
      "+++\n",
      "01:42:52 [INFO] step 1：player -1, action (1, 0)\n",
      "+++\n",
      "x+o\n",
      "+++\n",
      "01:42:59 [INFO] step 2：player 1, action (0, 2)\n",
      "++o\n",
      "x+o\n",
      "+++\n",
      "01:43:01 [INFO] step 3：player -1, action (1, 1)\n",
      "++o\n",
      "xxo\n",
      "+++\n",
      "01:43:02 [INFO] step 4：player 1, action (2, 2)\n",
      "++o\n",
      "xxo\n",
      "++o\n",
      "01:43:02 [INFO] test episode 1947: winner = 1, steps = 4\n",
      "01:43:21 [INFO] train episode 1948: winner = 0, steps = 8\n",
      "01:43:25 [INFO] train episode 1949: winner = 1, steps = 6\n",
      "01:43:39 [INFO] train episode 1950: winner = 1, steps = 6\n",
      "01:43:45 [INFO] train episode 1951: winner = 1, steps = 6\n",
      "01:43:55 [INFO] train episode 1952: winner = 0, steps = 8\n",
      "01:43:56 [INFO] train episode 1953: winner = 0, steps = 8\n",
      "01:44:00 [INFO] train episode 1954: winner = 1, steps = 6\n",
      "01:44:00 [INFO] train episode 1955: winner = 1, steps = 4\n",
      "01:44:13 [INFO] train episode 1956: winner = 1, steps = 6\n",
      "01:44:14 [INFO] train episode 1957: winner = 1, steps = 4\n",
      "01:44:24 [INFO] train episode 1958: winner = 1, steps = 4\n",
      "01:44:30 [INFO] train episode 1959: winner = 1, steps = 6\n",
      "01:44:31 [INFO] train episode 1960: winner = 1, steps = 4\n",
      "01:44:34 [INFO] train episode 1961: winner = 0, steps = 8\n",
      "01:44:44 [INFO] train episode 1962: winner = 0, steps = 8\n",
      "01:44:50 [INFO] train episode 1963: winner = 1, steps = 4\n",
      "01:44:53 [INFO] train episode 1964: winner = 1, steps = 8\n",
      "01:45:10 [INFO] train episode 1965: winner = 0, steps = 8\n",
      "01:45:21 [INFO] train episode 1966: winner = 0, steps = 8\n",
      "01:45:23 [INFO] train episode 1967: winner = 1, steps = 4\n",
      "01:45:27 [INFO] train episode 1968: winner = 0, steps = 8\n",
      "01:45:30 [INFO] train episode 1969: winner = 1, steps = 6\n",
      "01:45:37 [INFO] train episode 1970: winner = 0, steps = 8\n",
      "01:45:39 [INFO] train episode 1971: winner = 1, steps = 4\n",
      "01:45:45 [INFO] train episode 1972: winner = -1, steps = 7\n",
      "01:45:47 [INFO] train episode 1973: winner = 0, steps = 8\n",
      "01:45:50 [INFO] train episode 1974: winner = 0, steps = 8\n",
      "01:45:58 [INFO] train episode 1975: winner = 1, steps = 6\n",
      "01:46:01 [INFO] train episode 1976: winner = 0, steps = 8\n",
      "01:46:02 [INFO] train episode 1977: winner = 1, steps = 6\n",
      "01:46:06 [INFO] train episode 1978: winner = 1, steps = 4\n",
      "01:46:08 [INFO] train episode 1979: winner = 1, steps = 4\n",
      "01:46:11 [INFO] train episode 1980: winner = 0, steps = 8\n",
      "01:46:17 [INFO] train episode 1981: winner = -1, steps = 7\n",
      "01:46:19 [INFO] train episode 1982: winner = 1, steps = 4\n",
      "01:46:21 [INFO] train episode 1983: winner = 1, steps = 6\n",
      "01:46:22 [INFO] train episode 1984: winner = 1, steps = 4\n",
      "01:46:24 [INFO] train episode 1985: winner = 0, steps = 8\n",
      "01:46:25 [INFO] train episode 1986: winner = 1, steps = 4\n",
      "01:46:28 [INFO] train episode 1987: winner = 1, steps = 6\n",
      "01:46:31 [INFO] train episode 1988: winner = 0, steps = 8\n",
      "01:46:34 [INFO] train episode 1989: winner = -1, steps = 5\n",
      "01:46:36 [INFO] train episode 1990: winner = 0, steps = 8\n",
      "01:46:39 [INFO] train episode 1991: winner = 1, steps = 6\n",
      "01:46:41 [INFO] train episode 1992: winner = 1, steps = 6\n",
      "01:46:42 [INFO] train episode 1993: winner = 1, steps = 6\n",
      "01:46:42 [INFO] train episode 1994: winner = -1, steps = 5\n",
      "01:46:45 [INFO] train episode 1995: winner = 0, steps = 8\n",
      "01:46:47 [INFO] train episode 1996: winner = 1, steps = 4\n",
      "01:46:49 [INFO] train episode 1997: winner = 1, steps = 4\n",
      "01:46:52 [INFO] train episode 1998: winner = 0, steps = 8\n",
      "01:46:56 [INFO] train episode 1999: winner = 1, steps = 4\n",
      "01:46:56 [INFO] train episode 2000: winner = 1, steps = 6\n",
      "01:46:58 [INFO] train episode 2001: winner = 0, steps = 8\n",
      "01:46:58 [INFO] train episode 2002: winner = 1, steps = 4\n",
      "01:47:00 [INFO] train episode 2003: winner = 1, steps = 4\n",
      "01:47:05 [INFO] train episode 2004: winner = 1, steps = 6\n",
      "01:47:06 [INFO] train episode 2005: winner = 1, steps = 4\n",
      "01:47:09 [INFO] train episode 2006: winner = 0, steps = 8\n",
      "01:47:11 [INFO] train episode 2007: winner = 1, steps = 4\n",
      "01:47:12 [INFO] train episode 2008: winner = 1, steps = 6\n",
      "01:47:13 [INFO] train episode 2009: winner = 0, steps = 8\n",
      "01:47:14 [INFO] train episode 2010: winner = 1, steps = 4\n",
      "01:47:14 [INFO] train episode 2011: winner = 1, steps = 4\n",
      "01:47:15 [INFO] train episode 2012: winner = 0, steps = 8\n",
      "01:47:18 [INFO] train episode 2013: winner = 1, steps = 4\n",
      "01:47:22 [INFO] train episode 2014: winner = 1, steps = 6\n",
      "01:47:22 [INFO] train episode 2015: winner = 1, steps = 4\n",
      "01:47:26 [INFO] train episode 2016: winner = 0, steps = 8\n",
      "01:47:27 [INFO] train episode 2017: winner = 1, steps = 6\n",
      "01:47:31 [INFO] train episode 2018: winner = 1, steps = 8\n",
      "01:47:33 [INFO] train episode 2019: winner = 0, steps = 8\n",
      "01:47:36 [INFO] train episode 2020: winner = 0, steps = 8\n",
      "01:47:39 [INFO] train episode 2021: winner = 1, steps = 6\n",
      "01:47:43 [INFO] train episode 2022: winner = 0, steps = 8\n",
      "01:47:45 [INFO] train episode 2023: winner = 1, steps = 8\n",
      "01:47:49 [INFO] train episode 2024: winner = 1, steps = 6\n",
      "01:47:50 [INFO] train episode 2025: winner = -1, steps = 5\n",
      "01:47:50 [INFO] train episode 2026: winner = 1, steps = 6\n",
      "01:47:51 [INFO] train episode 2027: winner = 1, steps = 6\n",
      "01:47:52 [INFO] train episode 2028: winner = -1, steps = 5\n",
      "01:47:54 [INFO] train episode 2029: winner = 1, steps = 4\n",
      "01:47:56 [INFO] train episode 2030: winner = 1, steps = 4\n",
      "01:47:57 [INFO] train episode 2031: winner = 1, steps = 6\n",
      "01:47:59 [INFO] train episode 2032: winner = 1, steps = 4\n",
      "01:48:00 [INFO] train episode 2033: winner = 1, steps = 6\n",
      "01:48:00 [INFO] train episode 2034: winner = 0, steps = 8\n",
      "01:48:05 [INFO] train episode 2035: winner = 0, steps = 8\n",
      "01:48:05 [INFO] train episode 2036: winner = 0, steps = 8\n",
      "01:48:06 [INFO] train episode 2037: winner = 1, steps = 6\n",
      "01:48:09 [INFO] train episode 2038: winner = 0, steps = 8\n",
      "01:48:09 [INFO] train episode 2039: winner = 1, steps = 6\n",
      "01:48:10 [INFO] train episode 2040: winner = 0, steps = 8\n",
      "01:48:10 [INFO] train episode 2041: winner = -1, steps = 7\n",
      "01:48:12 [INFO] train episode 2042: winner = 0, steps = 8\n",
      "01:48:14 [INFO] train episode 2043: winner = 0, steps = 8\n",
      "01:48:15 [INFO] train episode 2044: winner = -1, steps = 5\n",
      "01:48:15 [INFO] train episode 2045: winner = 0, steps = 8\n",
      "01:48:16 [INFO] train episode 2046: winner = 1, steps = 6\n",
      "01:48:16 [INFO] train episode 2047: winner = 0, steps = 8\n",
      "01:48:17 [INFO] train episode 2048: winner = 0, steps = 8\n",
      "01:48:20 [INFO] train episode 2049: winner = 1, steps = 8\n",
      "01:48:21 [INFO] train episode 2050: winner = 1, steps = 6\n",
      "01:48:21 [INFO] train episode 2051: winner = 1, steps = 6\n",
      "01:48:22 [INFO] train episode 2052: winner = 1, steps = 6\n",
      "01:48:24 [INFO] train episode 2053: winner = 1, steps = 4\n",
      "01:48:26 [INFO] train episode 2054: winner = 1, steps = 4\n",
      "01:48:26 [INFO] train episode 2055: winner = 1, steps = 6\n",
      "01:48:26 [INFO] train episode 2056: winner = -1, steps = 5\n",
      "01:48:27 [INFO] train episode 2057: winner = -1, steps = 7\n",
      "01:48:31 [INFO] train episode 2058: winner = 1, steps = 6\n",
      "01:48:31 [INFO] train episode 2059: winner = 0, steps = 8\n",
      "01:48:34 [INFO] train episode 2060: winner = 1, steps = 6\n",
      "01:48:36 [INFO] train episode 2061: winner = 0, steps = 8\n",
      "01:48:38 [INFO] train episode 2062: winner = 0, steps = 8\n",
      "01:48:38 [INFO] train episode 2063: winner = 1, steps = 4\n",
      "01:48:41 [INFO] train episode 2064: winner = 1, steps = 6\n",
      "01:48:42 [INFO] train episode 2065: winner = 1, steps = 8\n",
      "01:48:46 [INFO] train episode 2066: winner = -1, steps = 7\n",
      "01:48:48 [INFO] train episode 2067: winner = 0, steps = 8\n",
      "01:48:48 [INFO] train episode 2068: winner = -1, steps = 5\n",
      "01:48:49 [INFO] train episode 2069: winner = 1, steps = 4\n",
      "01:48:49 [INFO] train episode 2070: winner = 1, steps = 6\n",
      "01:48:51 [INFO] train episode 2071: winner = -1, steps = 7\n",
      "01:48:52 [INFO] train episode 2072: winner = 1, steps = 6\n",
      "01:48:54 [INFO] train episode 2073: winner = -1, steps = 7\n",
      "01:48:55 [INFO] train episode 2074: winner = 1, steps = 6\n",
      "01:48:56 [INFO] train episode 2075: winner = 1, steps = 8\n",
      "01:48:57 [INFO] train episode 2076: winner = 1, steps = 4\n",
      "01:48:58 [INFO] train episode 2077: winner = 1, steps = 4\n",
      "01:48:58 [INFO] train episode 2078: winner = 1, steps = 4\n",
      "01:48:58 [INFO] train episode 2079: winner = 0, steps = 8\n",
      "01:48:59 [INFO] train episode 2080: winner = -1, steps = 5\n",
      "01:48:59 [INFO] train episode 2081: winner = -1, steps = 7\n",
      "01:48:59 [INFO] train episode 2082: winner = -1, steps = 7\n",
      "01:49:00 [INFO] train episode 2083: winner = 0, steps = 8\n",
      "01:49:01 [INFO] train episode 2084: winner = 1, steps = 8\n",
      "01:49:02 [INFO] train episode 2085: winner = 1, steps = 4\n",
      "01:49:05 [INFO] train episode 2086: winner = 1, steps = 8\n",
      "01:49:05 [INFO] test episode 2086:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "01:49:14 [INFO] step 0：player 1, action (2, 2)\n",
      "+++\n",
      "+++\n",
      "++o\n",
      "01:49:21 [INFO] step 1：player -1, action (0, 1)\n",
      "+x+\n",
      "+++\n",
      "++o\n",
      "01:49:25 [INFO] step 2：player 1, action (0, 2)\n",
      "+xo\n",
      "+++\n",
      "++o\n",
      "01:49:25 [INFO] step 3：player -1, action (1, 2)\n",
      "+xo\n",
      "++x\n",
      "++o\n",
      "01:49:26 [INFO] step 4：player 1, action (1, 1)\n",
      "+xo\n",
      "+ox\n",
      "++o\n",
      "01:49:26 [INFO] step 5：player -1, action (0, 0)\n",
      "xxo\n",
      "+ox\n",
      "++o\n",
      "01:49:27 [INFO] step 6：player 1, action (2, 0)\n",
      "xxo\n",
      "+ox\n",
      "o+o\n",
      "01:49:27 [INFO] test episode 2086: winner = 1, steps = 6\n",
      "01:49:42 [INFO] train episode 2087: winner = 1, steps = 4\n",
      "01:49:56 [INFO] train episode 2088: winner = 0, steps = 8\n",
      "01:50:07 [INFO] train episode 2089: winner = 1, steps = 6\n",
      "01:50:19 [INFO] train episode 2090: winner = 1, steps = 4\n",
      "01:50:28 [INFO] train episode 2091: winner = 1, steps = 4\n",
      "01:50:31 [INFO] train episode 2092: winner = 1, steps = 6\n",
      "01:50:35 [INFO] train episode 2093: winner = 1, steps = 6\n",
      "01:50:46 [INFO] train episode 2094: winner = 1, steps = 6\n",
      "01:50:46 [INFO] train episode 2095: winner = 1, steps = 6\n",
      "01:50:48 [INFO] train episode 2096: winner = 1, steps = 6\n",
      "01:50:50 [INFO] train episode 2097: winner = 1, steps = 6\n",
      "01:50:54 [INFO] train episode 2098: winner = 0, steps = 8\n",
      "01:50:58 [INFO] train episode 2099: winner = 1, steps = 6\n",
      "01:51:04 [INFO] train episode 2100: winner = 1, steps = 6\n",
      "01:51:11 [INFO] train episode 2101: winner = 0, steps = 8\n",
      "01:51:13 [INFO] train episode 2102: winner = 1, steps = 4\n",
      "01:51:16 [INFO] train episode 2103: winner = 1, steps = 4\n",
      "01:51:22 [INFO] train episode 2104: winner = 1, steps = 4\n",
      "01:51:23 [INFO] train episode 2105: winner = 1, steps = 4\n",
      "01:51:25 [INFO] train episode 2106: winner = 1, steps = 4\n",
      "01:51:27 [INFO] train episode 2107: winner = 1, steps = 6\n",
      "01:51:30 [INFO] train episode 2108: winner = -1, steps = 5\n",
      "01:51:30 [INFO] train episode 2109: winner = 1, steps = 4\n",
      "01:51:30 [INFO] train episode 2110: winner = 1, steps = 6\n",
      "01:51:43 [INFO] train episode 2111: winner = 1, steps = 6\n",
      "01:51:43 [INFO] train episode 2112: winner = 1, steps = 4\n",
      "01:51:56 [INFO] train episode 2113: winner = 0, steps = 8\n",
      "01:52:00 [INFO] train episode 2114: winner = 1, steps = 4\n",
      "01:52:04 [INFO] train episode 2115: winner = 0, steps = 8\n",
      "01:52:06 [INFO] train episode 2116: winner = 1, steps = 6\n",
      "01:52:11 [INFO] train episode 2117: winner = 0, steps = 8\n",
      "01:52:15 [INFO] train episode 2118: winner = 0, steps = 8\n",
      "01:52:17 [INFO] train episode 2119: winner = 1, steps = 6\n",
      "01:52:21 [INFO] train episode 2120: winner = 1, steps = 6\n",
      "01:52:26 [INFO] train episode 2121: winner = 0, steps = 8\n",
      "01:52:27 [INFO] train episode 2122: winner = 1, steps = 4\n",
      "01:52:33 [INFO] train episode 2123: winner = 0, steps = 8\n",
      "01:52:33 [INFO] train episode 2124: winner = 1, steps = 4\n",
      "01:52:36 [INFO] train episode 2125: winner = 1, steps = 6\n",
      "01:52:37 [INFO] train episode 2126: winner = 1, steps = 6\n",
      "01:52:41 [INFO] train episode 2127: winner = 0, steps = 8\n",
      "01:52:41 [INFO] train episode 2128: winner = 1, steps = 4\n",
      "01:52:43 [INFO] train episode 2129: winner = 1, steps = 4\n",
      "01:52:44 [INFO] train episode 2130: winner = 1, steps = 6\n",
      "01:52:45 [INFO] train episode 2131: winner = 1, steps = 6\n",
      "01:52:46 [INFO] train episode 2132: winner = 1, steps = 8\n",
      "01:52:48 [INFO] train episode 2133: winner = 1, steps = 4\n",
      "01:52:50 [INFO] train episode 2134: winner = 1, steps = 4\n",
      "01:52:51 [INFO] train episode 2135: winner = 1, steps = 6\n",
      "01:52:54 [INFO] train episode 2136: winner = 1, steps = 4\n",
      "01:52:58 [INFO] train episode 2137: winner = 1, steps = 4\n",
      "01:52:59 [INFO] train episode 2138: winner = 1, steps = 4\n",
      "01:53:02 [INFO] train episode 2139: winner = 1, steps = 6\n",
      "01:53:09 [INFO] train episode 2140: winner = 0, steps = 8\n",
      "01:53:12 [INFO] train episode 2141: winner = 1, steps = 6\n",
      "01:53:14 [INFO] train episode 2142: winner = -1, steps = 7\n",
      "01:53:16 [INFO] train episode 2143: winner = -1, steps = 5\n",
      "01:53:17 [INFO] train episode 2144: winner = 1, steps = 6\n",
      "01:53:18 [INFO] train episode 2145: winner = 0, steps = 8\n",
      "01:53:19 [INFO] train episode 2146: winner = 1, steps = 6\n",
      "01:53:25 [INFO] train episode 2147: winner = -1, steps = 7\n",
      "01:53:28 [INFO] train episode 2148: winner = 1, steps = 6\n",
      "01:53:29 [INFO] train episode 2149: winner = 1, steps = 8\n",
      "01:53:31 [INFO] train episode 2150: winner = -1, steps = 7\n",
      "01:53:38 [INFO] train episode 2151: winner = 0, steps = 8\n",
      "01:53:39 [INFO] train episode 2152: winner = 0, steps = 8\n",
      "01:53:39 [INFO] train episode 2153: winner = 1, steps = 4\n",
      "01:53:45 [INFO] train episode 2154: winner = 0, steps = 8\n",
      "01:53:47 [INFO] train episode 2155: winner = 1, steps = 6\n",
      "01:53:50 [INFO] train episode 2156: winner = 0, steps = 8\n",
      "01:53:52 [INFO] train episode 2157: winner = 1, steps = 6\n",
      "01:53:53 [INFO] train episode 2158: winner = -1, steps = 5\n",
      "01:53:53 [INFO] train episode 2159: winner = 1, steps = 6\n",
      "01:54:01 [INFO] train episode 2160: winner = 0, steps = 8\n",
      "01:54:02 [INFO] train episode 2161: winner = 1, steps = 6\n",
      "01:54:03 [INFO] train episode 2162: winner = 0, steps = 8\n",
      "01:54:05 [INFO] train episode 2163: winner = -1, steps = 7\n",
      "01:54:07 [INFO] train episode 2164: winner = 0, steps = 8\n",
      "01:54:07 [INFO] train episode 2165: winner = 1, steps = 6\n",
      "01:54:09 [INFO] train episode 2166: winner = 1, steps = 6\n",
      "01:54:09 [INFO] train episode 2167: winner = 1, steps = 4\n",
      "01:54:10 [INFO] train episode 2168: winner = 1, steps = 4\n",
      "01:54:10 [INFO] train episode 2169: winner = 1, steps = 6\n",
      "01:54:14 [INFO] train episode 2170: winner = 0, steps = 8\n",
      "01:54:17 [INFO] train episode 2171: winner = 1, steps = 6\n",
      "01:54:20 [INFO] train episode 2172: winner = -1, steps = 5\n",
      "01:54:20 [INFO] train episode 2173: winner = 0, steps = 8\n",
      "01:54:24 [INFO] train episode 2174: winner = 0, steps = 8\n",
      "01:54:26 [INFO] train episode 2175: winner = 1, steps = 4\n",
      "01:54:26 [INFO] train episode 2176: winner = 1, steps = 6\n",
      "01:54:27 [INFO] train episode 2177: winner = 0, steps = 8\n",
      "01:54:28 [INFO] train episode 2178: winner = 1, steps = 4\n",
      "01:54:31 [INFO] train episode 2179: winner = 0, steps = 8\n",
      "01:54:32 [INFO] train episode 2180: winner = 1, steps = 8\n",
      "01:54:33 [INFO] train episode 2181: winner = 0, steps = 8\n",
      "01:54:35 [INFO] train episode 2182: winner = 0, steps = 8\n",
      "01:54:39 [INFO] train episode 2183: winner = -1, steps = 5\n",
      "01:54:39 [INFO] train episode 2184: winner = 1, steps = 6\n",
      "01:54:40 [INFO] train episode 2185: winner = 0, steps = 8\n",
      "01:54:42 [INFO] train episode 2186: winner = -1, steps = 5\n",
      "01:54:44 [INFO] train episode 2187: winner = 0, steps = 8\n",
      "01:54:46 [INFO] train episode 2188: winner = 0, steps = 8\n",
      "01:54:48 [INFO] train episode 2189: winner = 0, steps = 8\n",
      "01:54:49 [INFO] train episode 2190: winner = 1, steps = 8\n",
      "01:54:50 [INFO] train episode 2191: winner = 1, steps = 4\n",
      "01:54:51 [INFO] train episode 2192: winner = 1, steps = 4\n",
      "01:54:51 [INFO] train episode 2193: winner = 1, steps = 6\n",
      "01:54:52 [INFO] train episode 2194: winner = -1, steps = 7\n",
      "01:54:53 [INFO] train episode 2195: winner = 1, steps = 6\n",
      "01:54:56 [INFO] train episode 2196: winner = 0, steps = 8\n",
      "01:54:57 [INFO] train episode 2197: winner = 0, steps = 8\n",
      "01:55:00 [INFO] train episode 2198: winner = 0, steps = 8\n",
      "01:55:01 [INFO] train episode 2199: winner = 1, steps = 4\n",
      "01:55:03 [INFO] train episode 2200: winner = 1, steps = 6\n",
      "01:55:05 [INFO] train episode 2201: winner = 0, steps = 8\n",
      "01:55:07 [INFO] train episode 2202: winner = 1, steps = 8\n",
      "01:55:08 [INFO] train episode 2203: winner = 0, steps = 8\n",
      "01:55:09 [INFO] train episode 2204: winner = -1, steps = 7\n",
      "01:55:11 [INFO] train episode 2205: winner = 0, steps = 8\n",
      "01:55:14 [INFO] train episode 2206: winner = 1, steps = 6\n",
      "01:55:14 [INFO] train episode 2207: winner = 1, steps = 6\n",
      "01:55:15 [INFO] train episode 2208: winner = 1, steps = 6\n",
      "01:55:15 [INFO] train episode 2209: winner = 1, steps = 6\n",
      "01:55:17 [INFO] train episode 2210: winner = 0, steps = 8\n",
      "01:55:17 [INFO] train episode 2211: winner = 1, steps = 6\n",
      "01:55:19 [INFO] train episode 2212: winner = 0, steps = 8\n",
      "01:55:20 [INFO] train episode 2213: winner = 1, steps = 4\n",
      "01:55:20 [INFO] train episode 2214: winner = 0, steps = 8\n",
      "01:55:20 [INFO] train episode 2215: winner = 0, steps = 8\n",
      "01:55:20 [INFO] train episode 2216: winner = -1, steps = 7\n",
      "01:55:20 [INFO] train episode 2217: winner = 1, steps = 4\n",
      "01:55:22 [INFO] train episode 2218: winner = 0, steps = 8\n",
      "01:55:23 [INFO] train episode 2219: winner = 0, steps = 8\n",
      "01:55:24 [INFO] train episode 2220: winner = -1, steps = 5\n",
      "01:55:27 [INFO] train episode 2221: winner = 1, steps = 8\n",
      "01:55:28 [INFO] train episode 2222: winner = 1, steps = 6\n",
      "01:55:30 [INFO] train episode 2223: winner = -1, steps = 7\n",
      "01:55:30 [INFO] train episode 2224: winner = 0, steps = 8\n",
      "01:55:30 [INFO] test episode 2224:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "01:55:39 [INFO] step 0：player 1, action (0, 2)\n",
      "++o\n",
      "+++\n",
      "+++\n",
      "01:55:47 [INFO] step 1：player -1, action (1, 0)\n",
      "++o\n",
      "x++\n",
      "+++\n",
      "01:55:51 [INFO] step 2：player 1, action (0, 0)\n",
      "o+o\n",
      "x++\n",
      "+++\n",
      "01:55:52 [INFO] step 3：player -1, action (0, 1)\n",
      "oxo\n",
      "x++\n",
      "+++\n",
      "01:55:53 [INFO] step 4：player 1, action (2, 2)\n",
      "oxo\n",
      "x++\n",
      "++o\n",
      "01:55:53 [INFO] step 5：player -1, action (2, 1)\n",
      "oxo\n",
      "x++\n",
      "+xo\n",
      "01:55:53 [INFO] step 6：player 1, action (1, 2)\n",
      "oxo\n",
      "x+o\n",
      "+xo\n",
      "01:55:53 [INFO] test episode 2224: winner = 1, steps = 6\n",
      "01:56:05 [INFO] train episode 2225: winner = 1, steps = 4\n",
      "01:56:19 [INFO] train episode 2226: winner = 0, steps = 8\n",
      "01:56:29 [INFO] train episode 2227: winner = 1, steps = 4\n",
      "01:56:33 [INFO] train episode 2228: winner = 1, steps = 6\n",
      "01:56:41 [INFO] train episode 2229: winner = 0, steps = 8\n",
      "01:56:58 [INFO] train episode 2230: winner = 0, steps = 8\n",
      "01:57:07 [INFO] train episode 2231: winner = 1, steps = 4\n",
      "01:57:12 [INFO] train episode 2232: winner = 0, steps = 8\n",
      "01:57:19 [INFO] train episode 2233: winner = -1, steps = 5\n",
      "01:57:26 [INFO] train episode 2234: winner = 0, steps = 8\n",
      "01:57:29 [INFO] train episode 2235: winner = 1, steps = 6\n",
      "01:57:33 [INFO] train episode 2236: winner = 1, steps = 6\n",
      "01:57:36 [INFO] train episode 2237: winner = 1, steps = 6\n",
      "01:57:41 [INFO] train episode 2238: winner = 0, steps = 8\n",
      "01:57:43 [INFO] train episode 2239: winner = 1, steps = 4\n",
      "01:57:52 [INFO] train episode 2240: winner = -1, steps = 5\n",
      "01:57:56 [INFO] train episode 2241: winner = 0, steps = 8\n",
      "01:58:07 [INFO] train episode 2242: winner = -1, steps = 5\n",
      "01:58:16 [INFO] train episode 2243: winner = 0, steps = 8\n",
      "01:58:17 [INFO] train episode 2244: winner = 1, steps = 6\n",
      "01:58:20 [INFO] train episode 2245: winner = 1, steps = 6\n",
      "01:58:22 [INFO] train episode 2246: winner = 0, steps = 8\n",
      "01:58:25 [INFO] train episode 2247: winner = 1, steps = 6\n",
      "01:58:29 [INFO] train episode 2248: winner = 0, steps = 8\n",
      "01:58:29 [INFO] train episode 2249: winner = 1, steps = 6\n",
      "01:58:31 [INFO] train episode 2250: winner = 0, steps = 8\n",
      "01:58:33 [INFO] train episode 2251: winner = 1, steps = 6\n",
      "01:58:35 [INFO] train episode 2252: winner = 1, steps = 4\n",
      "01:58:38 [INFO] train episode 2253: winner = 1, steps = 6\n",
      "01:58:47 [INFO] train episode 2254: winner = 0, steps = 8\n",
      "01:58:47 [INFO] train episode 2255: winner = -1, steps = 5\n",
      "01:58:50 [INFO] train episode 2256: winner = -1, steps = 7\n",
      "01:58:54 [INFO] train episode 2257: winner = 1, steps = 6\n",
      "01:58:56 [INFO] train episode 2258: winner = -1, steps = 5\n",
      "01:59:00 [INFO] train episode 2259: winner = 1, steps = 8\n",
      "01:59:03 [INFO] train episode 2260: winner = 0, steps = 8\n",
      "01:59:03 [INFO] train episode 2261: winner = 1, steps = 6\n",
      "01:59:07 [INFO] train episode 2262: winner = 0, steps = 8\n",
      "01:59:12 [INFO] train episode 2263: winner = 0, steps = 8\n",
      "01:59:13 [INFO] train episode 2264: winner = -1, steps = 5\n",
      "01:59:19 [INFO] train episode 2265: winner = 0, steps = 8\n",
      "01:59:20 [INFO] train episode 2266: winner = 0, steps = 8\n",
      "01:59:22 [INFO] train episode 2267: winner = 0, steps = 8\n",
      "01:59:25 [INFO] train episode 2268: winner = 1, steps = 6\n",
      "01:59:25 [INFO] train episode 2269: winner = 1, steps = 4\n",
      "01:59:25 [INFO] train episode 2270: winner = 1, steps = 4\n",
      "01:59:25 [INFO] train episode 2271: winner = 0, steps = 8\n",
      "01:59:26 [INFO] train episode 2272: winner = 1, steps = 6\n",
      "01:59:26 [INFO] train episode 2273: winner = 1, steps = 6\n",
      "01:59:31 [INFO] train episode 2274: winner = 1, steps = 6\n",
      "01:59:37 [INFO] train episode 2275: winner = 0, steps = 8\n",
      "01:59:38 [INFO] train episode 2276: winner = 0, steps = 8\n",
      "01:59:39 [INFO] train episode 2277: winner = 1, steps = 6\n",
      "01:59:39 [INFO] train episode 2278: winner = 1, steps = 6\n",
      "01:59:39 [INFO] train episode 2279: winner = 1, steps = 6\n",
      "01:59:40 [INFO] train episode 2280: winner = 0, steps = 8\n",
      "01:59:41 [INFO] train episode 2281: winner = 0, steps = 8\n",
      "01:59:46 [INFO] train episode 2282: winner = 1, steps = 8\n",
      "01:59:50 [INFO] train episode 2283: winner = 1, steps = 8\n",
      "01:59:50 [INFO] train episode 2284: winner = 1, steps = 4\n",
      "01:59:52 [INFO] train episode 2285: winner = 1, steps = 4\n",
      "01:59:54 [INFO] train episode 2286: winner = 0, steps = 8\n",
      "01:59:54 [INFO] train episode 2287: winner = 1, steps = 4\n",
      "01:59:56 [INFO] train episode 2288: winner = 1, steps = 6\n",
      "01:59:58 [INFO] train episode 2289: winner = 1, steps = 4\n",
      "02:00:03 [INFO] train episode 2290: winner = -1, steps = 7\n",
      "02:00:05 [INFO] train episode 2291: winner = 1, steps = 8\n",
      "02:00:05 [INFO] train episode 2292: winner = 1, steps = 6\n",
      "02:00:05 [INFO] train episode 2293: winner = 1, steps = 6\n",
      "02:00:07 [INFO] train episode 2294: winner = 1, steps = 6\n",
      "02:00:08 [INFO] train episode 2295: winner = 1, steps = 6\n",
      "02:00:08 [INFO] train episode 2296: winner = 1, steps = 4\n",
      "02:00:08 [INFO] train episode 2297: winner = -1, steps = 5\n",
      "02:00:09 [INFO] train episode 2298: winner = 1, steps = 6\n",
      "02:00:10 [INFO] train episode 2299: winner = 0, steps = 8\n",
      "02:00:11 [INFO] train episode 2300: winner = 0, steps = 8\n",
      "02:00:12 [INFO] train episode 2301: winner = 1, steps = 4\n",
      "02:00:14 [INFO] train episode 2302: winner = 1, steps = 6\n",
      "02:00:14 [INFO] train episode 2303: winner = 1, steps = 6\n",
      "02:00:15 [INFO] train episode 2304: winner = 1, steps = 4\n",
      "02:00:18 [INFO] train episode 2305: winner = 0, steps = 8\n",
      "02:00:19 [INFO] train episode 2306: winner = 1, steps = 6\n",
      "02:00:19 [INFO] train episode 2307: winner = 1, steps = 4\n",
      "02:00:19 [INFO] train episode 2308: winner = 1, steps = 4\n",
      "02:00:26 [INFO] train episode 2309: winner = 0, steps = 8\n",
      "02:00:26 [INFO] train episode 2310: winner = 0, steps = 8\n",
      "02:00:28 [INFO] train episode 2311: winner = 0, steps = 8\n",
      "02:00:28 [INFO] train episode 2312: winner = 1, steps = 6\n",
      "02:00:31 [INFO] train episode 2313: winner = 0, steps = 8\n",
      "02:00:33 [INFO] train episode 2314: winner = 1, steps = 6\n",
      "02:00:33 [INFO] train episode 2315: winner = 0, steps = 8\n",
      "02:00:33 [INFO] train episode 2316: winner = 1, steps = 4\n",
      "02:00:34 [INFO] train episode 2317: winner = 0, steps = 8\n",
      "02:00:36 [INFO] train episode 2318: winner = 0, steps = 8\n",
      "02:00:37 [INFO] train episode 2319: winner = 1, steps = 6\n",
      "02:00:40 [INFO] train episode 2320: winner = 0, steps = 8\n",
      "02:00:40 [INFO] train episode 2321: winner = 1, steps = 6\n",
      "02:00:40 [INFO] train episode 2322: winner = 1, steps = 6\n",
      "02:00:42 [INFO] train episode 2323: winner = 0, steps = 8\n",
      "02:00:42 [INFO] train episode 2324: winner = 1, steps = 4\n",
      "02:00:47 [INFO] train episode 2325: winner = 0, steps = 8\n",
      "02:00:49 [INFO] train episode 2326: winner = 1, steps = 4\n",
      "02:00:49 [INFO] train episode 2327: winner = 0, steps = 8\n",
      "02:00:49 [INFO] train episode 2328: winner = 1, steps = 6\n",
      "02:00:52 [INFO] train episode 2329: winner = 1, steps = 6\n",
      "02:00:53 [INFO] train episode 2330: winner = -1, steps = 5\n",
      "02:00:54 [INFO] train episode 2331: winner = 1, steps = 4\n",
      "02:00:55 [INFO] train episode 2332: winner = 1, steps = 4\n",
      "02:00:56 [INFO] train episode 2333: winner = 1, steps = 6\n",
      "02:00:57 [INFO] train episode 2334: winner = -1, steps = 7\n",
      "02:00:58 [INFO] train episode 2335: winner = -1, steps = 7\n",
      "02:00:59 [INFO] train episode 2336: winner = 1, steps = 6\n",
      "02:01:01 [INFO] train episode 2337: winner = 1, steps = 6\n",
      "02:01:04 [INFO] train episode 2338: winner = -1, steps = 5\n",
      "02:01:04 [INFO] train episode 2339: winner = 0, steps = 8\n",
      "02:01:07 [INFO] train episode 2340: winner = 1, steps = 6\n",
      "02:01:10 [INFO] train episode 2341: winner = 0, steps = 8\n",
      "02:01:11 [INFO] train episode 2342: winner = 0, steps = 8\n",
      "02:01:12 [INFO] train episode 2343: winner = -1, steps = 5\n",
      "02:01:12 [INFO] train episode 2344: winner = 1, steps = 6\n",
      "02:01:14 [INFO] train episode 2345: winner = 0, steps = 8\n",
      "02:01:14 [INFO] train episode 2346: winner = 0, steps = 8\n",
      "02:01:16 [INFO] train episode 2347: winner = 0, steps = 8\n",
      "02:01:17 [INFO] train episode 2348: winner = 0, steps = 8\n",
      "02:01:17 [INFO] train episode 2349: winner = 1, steps = 4\n",
      "02:01:19 [INFO] train episode 2350: winner = 1, steps = 6\n",
      "02:01:21 [INFO] train episode 2351: winner = 1, steps = 6\n",
      "02:01:21 [INFO] train episode 2352: winner = 1, steps = 4\n",
      "02:01:22 [INFO] train episode 2353: winner = 1, steps = 6\n",
      "02:01:23 [INFO] train episode 2354: winner = 1, steps = 4\n",
      "02:01:25 [INFO] train episode 2355: winner = 0, steps = 8\n",
      "02:01:29 [INFO] train episode 2356: winner = 1, steps = 8\n",
      "02:01:30 [INFO] train episode 2357: winner = 0, steps = 8\n",
      "02:01:32 [INFO] train episode 2358: winner = 0, steps = 8\n",
      "02:01:33 [INFO] train episode 2359: winner = 0, steps = 8\n",
      "02:01:36 [INFO] train episode 2360: winner = 0, steps = 8\n",
      "02:01:36 [INFO] test episode 2360:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "02:01:45 [INFO] step 0：player 1, action (2, 2)\n",
      "+++\n",
      "+++\n",
      "++o\n",
      "02:01:53 [INFO] step 1：player -1, action (0, 2)\n",
      "++x\n",
      "+++\n",
      "++o\n",
      "02:01:58 [INFO] step 2：player 1, action (1, 1)\n",
      "++x\n",
      "+o+\n",
      "++o\n",
      "02:01:59 [INFO] step 3：player -1, action (0, 0)\n",
      "x+x\n",
      "+o+\n",
      "++o\n",
      "02:01:59 [INFO] step 4：player 1, action (0, 1)\n",
      "xox\n",
      "+o+\n",
      "++o\n",
      "02:01:59 [INFO] step 5：player -1, action (2, 1)\n",
      "xox\n",
      "+o+\n",
      "+xo\n",
      "02:01:59 [INFO] step 6：player 1, action (1, 2)\n",
      "xox\n",
      "+oo\n",
      "+xo\n",
      "02:02:00 [INFO] step 7：player -1, action (1, 0)\n",
      "xox\n",
      "xoo\n",
      "+xo\n",
      "02:02:00 [INFO] step 8：player 1, action (2, 0)\n",
      "xox\n",
      "xoo\n",
      "oxo\n",
      "02:02:00 [INFO] test episode 2360: winner = 0, steps = 8\n",
      "02:02:14 [INFO] train episode 2361: winner = 0, steps = 8\n",
      "02:02:33 [INFO] train episode 2362: winner = 0, steps = 8\n",
      "02:02:48 [INFO] train episode 2363: winner = 1, steps = 8\n",
      "02:02:55 [INFO] train episode 2364: winner = -1, steps = 7\n",
      "02:03:08 [INFO] train episode 2365: winner = 0, steps = 8\n",
      "02:03:09 [INFO] train episode 2366: winner = 0, steps = 8\n",
      "02:03:21 [INFO] train episode 2367: winner = 0, steps = 8\n",
      "02:03:28 [INFO] train episode 2368: winner = 0, steps = 8\n",
      "02:03:37 [INFO] train episode 2369: winner = 1, steps = 4\n",
      "02:03:44 [INFO] train episode 2370: winner = -1, steps = 5\n",
      "02:03:49 [INFO] train episode 2371: winner = 0, steps = 8\n",
      "02:03:51 [INFO] train episode 2372: winner = -1, steps = 5\n",
      "02:04:01 [INFO] train episode 2373: winner = 0, steps = 8\n",
      "02:04:09 [INFO] train episode 2374: winner = 0, steps = 8\n",
      "02:04:13 [INFO] train episode 2375: winner = 0, steps = 8\n",
      "02:04:22 [INFO] train episode 2376: winner = 0, steps = 8\n",
      "02:04:29 [INFO] train episode 2377: winner = 1, steps = 6\n",
      "02:04:36 [INFO] train episode 2378: winner = 0, steps = 8\n",
      "02:04:40 [INFO] train episode 2379: winner = 1, steps = 4\n",
      "02:04:42 [INFO] train episode 2380: winner = 0, steps = 8\n",
      "02:04:46 [INFO] train episode 2381: winner = 0, steps = 8\n",
      "02:04:46 [INFO] train episode 2382: winner = 1, steps = 6\n",
      "02:04:49 [INFO] train episode 2383: winner = 1, steps = 6\n",
      "02:04:52 [INFO] train episode 2384: winner = 0, steps = 8\n",
      "02:04:57 [INFO] train episode 2385: winner = 1, steps = 6\n",
      "02:05:01 [INFO] train episode 2386: winner = 1, steps = 6\n",
      "02:05:01 [INFO] train episode 2387: winner = 0, steps = 8\n",
      "02:05:04 [INFO] train episode 2388: winner = 1, steps = 6\n",
      "02:05:06 [INFO] train episode 2389: winner = 0, steps = 8\n",
      "02:05:09 [INFO] train episode 2390: winner = -1, steps = 5\n",
      "02:05:12 [INFO] train episode 2391: winner = 1, steps = 6\n",
      "02:05:14 [INFO] train episode 2392: winner = 0, steps = 8\n",
      "02:05:15 [INFO] train episode 2393: winner = 0, steps = 8\n",
      "02:05:15 [INFO] train episode 2394: winner = 0, steps = 8\n",
      "02:05:20 [INFO] train episode 2395: winner = 0, steps = 8\n",
      "02:05:25 [INFO] train episode 2396: winner = 0, steps = 8\n",
      "02:05:25 [INFO] train episode 2397: winner = 0, steps = 8\n",
      "02:05:25 [INFO] train episode 2398: winner = 1, steps = 4\n",
      "02:05:29 [INFO] train episode 2399: winner = 0, steps = 8\n",
      "02:05:29 [INFO] train episode 2400: winner = -1, steps = 5\n",
      "02:05:29 [INFO] train episode 2401: winner = 1, steps = 4\n",
      "02:05:32 [INFO] train episode 2402: winner = 1, steps = 4\n",
      "02:05:33 [INFO] train episode 2403: winner = 1, steps = 4\n",
      "02:05:37 [INFO] train episode 2404: winner = 1, steps = 6\n",
      "02:05:41 [INFO] train episode 2405: winner = 0, steps = 8\n",
      "02:05:43 [INFO] train episode 2406: winner = 1, steps = 6\n",
      "02:05:44 [INFO] train episode 2407: winner = 1, steps = 6\n",
      "02:05:49 [INFO] train episode 2408: winner = 1, steps = 8\n",
      "02:05:50 [INFO] train episode 2409: winner = 0, steps = 8\n",
      "02:05:51 [INFO] train episode 2410: winner = 1, steps = 6\n",
      "02:05:53 [INFO] train episode 2411: winner = 1, steps = 4\n",
      "02:05:59 [INFO] train episode 2412: winner = 1, steps = 8\n",
      "02:06:02 [INFO] train episode 2413: winner = 0, steps = 8\n",
      "02:06:05 [INFO] train episode 2414: winner = 0, steps = 8\n",
      "02:06:07 [INFO] train episode 2415: winner = 1, steps = 4\n",
      "02:06:11 [INFO] train episode 2416: winner = 0, steps = 8\n",
      "02:06:14 [INFO] train episode 2417: winner = 0, steps = 8\n",
      "02:06:15 [INFO] train episode 2418: winner = 1, steps = 4\n",
      "02:06:17 [INFO] train episode 2419: winner = 0, steps = 8\n",
      "02:06:22 [INFO] train episode 2420: winner = 1, steps = 6\n",
      "02:06:22 [INFO] train episode 2421: winner = 0, steps = 8\n",
      "02:06:22 [INFO] train episode 2422: winner = 0, steps = 8\n",
      "02:06:24 [INFO] train episode 2423: winner = 1, steps = 4\n",
      "02:06:26 [INFO] train episode 2424: winner = 1, steps = 6\n",
      "02:06:28 [INFO] train episode 2425: winner = 1, steps = 6\n",
      "02:06:29 [INFO] train episode 2426: winner = 1, steps = 8\n",
      "02:06:32 [INFO] train episode 2427: winner = 0, steps = 8\n",
      "02:06:32 [INFO] train episode 2428: winner = 0, steps = 8\n",
      "02:06:33 [INFO] train episode 2429: winner = 1, steps = 6\n",
      "02:06:34 [INFO] train episode 2430: winner = 1, steps = 4\n",
      "02:06:37 [INFO] train episode 2431: winner = 0, steps = 8\n",
      "02:06:39 [INFO] train episode 2432: winner = 1, steps = 6\n",
      "02:06:42 [INFO] train episode 2433: winner = 1, steps = 8\n",
      "02:06:44 [INFO] train episode 2434: winner = -1, steps = 5\n",
      "02:06:44 [INFO] train episode 2435: winner = -1, steps = 7\n",
      "02:06:44 [INFO] train episode 2436: winner = 1, steps = 4\n",
      "02:06:44 [INFO] train episode 2437: winner = 0, steps = 8\n",
      "02:06:46 [INFO] train episode 2438: winner = 1, steps = 6\n",
      "02:06:47 [INFO] train episode 2439: winner = 1, steps = 4\n",
      "02:06:48 [INFO] train episode 2440: winner = -1, steps = 7\n",
      "02:06:50 [INFO] train episode 2441: winner = 1, steps = 4\n",
      "02:06:52 [INFO] train episode 2442: winner = 0, steps = 8\n",
      "02:06:55 [INFO] train episode 2443: winner = 0, steps = 8\n",
      "02:06:55 [INFO] train episode 2444: winner = 1, steps = 6\n",
      "02:07:02 [INFO] train episode 2445: winner = 0, steps = 8\n",
      "02:07:04 [INFO] train episode 2446: winner = 1, steps = 8\n",
      "02:07:04 [INFO] train episode 2447: winner = 0, steps = 8\n",
      "02:07:04 [INFO] train episode 2448: winner = 0, steps = 8\n",
      "02:07:04 [INFO] train episode 2449: winner = 1, steps = 6\n",
      "02:07:04 [INFO] train episode 2450: winner = 1, steps = 6\n",
      "02:07:04 [INFO] train episode 2451: winner = 0, steps = 8\n",
      "02:07:05 [INFO] train episode 2452: winner = 1, steps = 4\n",
      "02:07:05 [INFO] train episode 2453: winner = 0, steps = 8\n",
      "02:07:05 [INFO] train episode 2454: winner = 1, steps = 6\n",
      "02:07:06 [INFO] train episode 2455: winner = 1, steps = 6\n",
      "02:07:07 [INFO] train episode 2456: winner = -1, steps = 7\n",
      "02:07:09 [INFO] train episode 2457: winner = 1, steps = 6\n",
      "02:07:10 [INFO] train episode 2458: winner = 1, steps = 6\n",
      "02:07:12 [INFO] train episode 2459: winner = -1, steps = 7\n",
      "02:07:14 [INFO] train episode 2460: winner = 1, steps = 8\n",
      "02:07:16 [INFO] train episode 2461: winner = 1, steps = 4\n",
      "02:07:17 [INFO] train episode 2462: winner = 0, steps = 8\n",
      "02:07:17 [INFO] train episode 2463: winner = 0, steps = 8\n",
      "02:07:18 [INFO] train episode 2464: winner = 0, steps = 8\n",
      "02:07:21 [INFO] train episode 2465: winner = 0, steps = 8\n",
      "02:07:21 [INFO] train episode 2466: winner = 1, steps = 6\n",
      "02:07:22 [INFO] train episode 2467: winner = 1, steps = 4\n",
      "02:07:26 [INFO] train episode 2468: winner = 0, steps = 8\n",
      "02:07:27 [INFO] train episode 2469: winner = 0, steps = 8\n",
      "02:07:30 [INFO] train episode 2470: winner = 0, steps = 8\n",
      "02:07:33 [INFO] train episode 2471: winner = 1, steps = 8\n",
      "02:07:36 [INFO] train episode 2472: winner = 0, steps = 8\n",
      "02:07:37 [INFO] train episode 2473: winner = -1, steps = 5\n",
      "02:07:37 [INFO] train episode 2474: winner = 1, steps = 4\n",
      "02:07:41 [INFO] train episode 2475: winner = 0, steps = 8\n",
      "02:07:41 [INFO] train episode 2476: winner = 0, steps = 8\n",
      "02:07:42 [INFO] train episode 2477: winner = 1, steps = 6\n",
      "02:07:43 [INFO] train episode 2478: winner = 0, steps = 8\n",
      "02:07:45 [INFO] train episode 2479: winner = 0, steps = 8\n",
      "02:07:45 [INFO] train episode 2480: winner = -1, steps = 5\n",
      "02:07:45 [INFO] train episode 2481: winner = 1, steps = 4\n",
      "02:07:45 [INFO] train episode 2482: winner = 1, steps = 4\n",
      "02:07:46 [INFO] train episode 2483: winner = 0, steps = 8\n",
      "02:07:47 [INFO] train episode 2484: winner = 1, steps = 4\n",
      "02:07:50 [INFO] train episode 2485: winner = -1, steps = 7\n",
      "02:07:50 [INFO] train episode 2486: winner = 1, steps = 4\n",
      "02:07:52 [INFO] train episode 2487: winner = -1, steps = 7\n",
      "02:07:53 [INFO] train episode 2488: winner = 0, steps = 8\n",
      "02:07:56 [INFO] train episode 2489: winner = 0, steps = 8\n",
      "02:07:58 [INFO] train episode 2490: winner = 1, steps = 6\n",
      "02:07:58 [INFO] test episode 2490:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "02:08:08 [INFO] step 0：player 1, action (0, 2)\n",
      "++o\n",
      "+++\n",
      "+++\n",
      "02:08:15 [INFO] step 1：player -1, action (1, 2)\n",
      "++o\n",
      "++x\n",
      "+++\n",
      "02:08:17 [INFO] step 2：player 1, action (2, 2)\n",
      "++o\n",
      "++x\n",
      "++o\n",
      "02:08:21 [INFO] step 3：player -1, action (1, 1)\n",
      "++o\n",
      "+xx\n",
      "++o\n",
      "02:08:22 [INFO] step 4：player 1, action (1, 0)\n",
      "++o\n",
      "oxx\n",
      "++o\n",
      "02:08:22 [INFO] step 5：player -1, action (2, 0)\n",
      "++o\n",
      "oxx\n",
      "x+o\n",
      "02:08:23 [INFO] step 6：player 1, action (2, 1)\n",
      "++o\n",
      "oxx\n",
      "xoo\n",
      "02:08:23 [INFO] step 7：player -1, action (0, 1)\n",
      "+xo\n",
      "oxx\n",
      "xoo\n",
      "02:08:23 [INFO] step 8：player 1, action (0, 0)\n",
      "oxo\n",
      "oxx\n",
      "xoo\n",
      "02:08:23 [INFO] test episode 2490: winner = 0, steps = 8\n",
      "02:08:38 [INFO] train episode 2491: winner = 1, steps = 4\n",
      "02:08:43 [INFO] train episode 2492: winner = 1, steps = 4\n",
      "02:09:00 [INFO] train episode 2493: winner = 0, steps = 8\n",
      "02:09:12 [INFO] train episode 2494: winner = 0, steps = 8\n",
      "02:09:20 [INFO] train episode 2495: winner = 1, steps = 4\n",
      "02:09:34 [INFO] train episode 2496: winner = -1, steps = 7\n",
      "02:09:35 [INFO] train episode 2497: winner = 1, steps = 4\n",
      "02:09:38 [INFO] train episode 2498: winner = 1, steps = 4\n",
      "02:09:45 [INFO] train episode 2499: winner = -1, steps = 7\n",
      "02:09:51 [INFO] train episode 2500: winner = 1, steps = 4\n",
      "02:10:00 [INFO] train episode 2501: winner = 1, steps = 4\n",
      "02:10:07 [INFO] train episode 2502: winner = 1, steps = 6\n",
      "02:10:13 [INFO] train episode 2503: winner = 0, steps = 8\n",
      "02:10:21 [INFO] train episode 2504: winner = -1, steps = 7\n",
      "02:10:21 [INFO] train episode 2505: winner = 1, steps = 4\n",
      "02:10:22 [INFO] train episode 2506: winner = -1, steps = 5\n",
      "02:10:23 [INFO] train episode 2507: winner = 1, steps = 6\n",
      "02:10:27 [INFO] train episode 2508: winner = 0, steps = 8\n",
      "02:10:29 [INFO] train episode 2509: winner = 1, steps = 6\n",
      "02:10:32 [INFO] train episode 2510: winner = 0, steps = 8\n",
      "02:10:34 [INFO] train episode 2511: winner = 0, steps = 8\n",
      "02:10:34 [INFO] train episode 2512: winner = 0, steps = 8\n",
      "02:10:38 [INFO] train episode 2513: winner = 0, steps = 8\n",
      "02:10:39 [INFO] train episode 2514: winner = 1, steps = 6\n",
      "02:10:48 [INFO] train episode 2515: winner = 1, steps = 6\n",
      "02:10:51 [INFO] train episode 2516: winner = 1, steps = 4\n",
      "02:10:54 [INFO] train episode 2517: winner = 1, steps = 6\n",
      "02:10:55 [INFO] train episode 2518: winner = 1, steps = 6\n",
      "02:10:56 [INFO] train episode 2519: winner = 0, steps = 8\n",
      "02:10:57 [INFO] train episode 2520: winner = 0, steps = 8\n",
      "02:11:01 [INFO] train episode 2521: winner = 0, steps = 8\n",
      "02:11:02 [INFO] train episode 2522: winner = 1, steps = 4\n",
      "02:11:07 [INFO] train episode 2523: winner = 0, steps = 8\n",
      "02:11:09 [INFO] train episode 2524: winner = 1, steps = 4\n",
      "02:11:13 [INFO] train episode 2525: winner = 1, steps = 6\n",
      "02:11:13 [INFO] train episode 2526: winner = 1, steps = 4\n",
      "02:11:16 [INFO] train episode 2527: winner = 1, steps = 4\n",
      "02:11:18 [INFO] train episode 2528: winner = 1, steps = 4\n",
      "02:11:21 [INFO] train episode 2529: winner = 1, steps = 4\n",
      "02:11:24 [INFO] train episode 2530: winner = 1, steps = 6\n",
      "02:11:26 [INFO] train episode 2531: winner = 1, steps = 6\n",
      "02:11:28 [INFO] train episode 2532: winner = 1, steps = 4\n",
      "02:11:29 [INFO] train episode 2533: winner = 1, steps = 4\n",
      "02:11:31 [INFO] train episode 2534: winner = 1, steps = 8\n",
      "02:11:33 [INFO] train episode 2535: winner = 1, steps = 4\n",
      "02:11:35 [INFO] train episode 2536: winner = 0, steps = 8\n",
      "02:11:35 [INFO] train episode 2537: winner = 0, steps = 8\n",
      "02:11:36 [INFO] train episode 2538: winner = 1, steps = 4\n",
      "02:11:36 [INFO] train episode 2539: winner = 1, steps = 4\n",
      "02:11:36 [INFO] train episode 2540: winner = 1, steps = 4\n",
      "02:11:37 [INFO] train episode 2541: winner = 1, steps = 4\n",
      "02:11:42 [INFO] train episode 2542: winner = -1, steps = 5\n",
      "02:11:43 [INFO] train episode 2543: winner = 1, steps = 6\n",
      "02:11:44 [INFO] train episode 2544: winner = 1, steps = 6\n",
      "02:11:47 [INFO] train episode 2545: winner = 0, steps = 8\n",
      "02:11:49 [INFO] train episode 2546: winner = 1, steps = 6\n",
      "02:11:49 [INFO] train episode 2547: winner = 0, steps = 8\n",
      "02:11:52 [INFO] train episode 2548: winner = 1, steps = 6\n",
      "02:11:56 [INFO] train episode 2549: winner = 1, steps = 4\n",
      "02:11:56 [INFO] train episode 2550: winner = 1, steps = 4\n",
      "02:11:58 [INFO] train episode 2551: winner = 1, steps = 4\n",
      "02:11:58 [INFO] train episode 2552: winner = 1, steps = 4\n",
      "02:12:00 [INFO] train episode 2553: winner = 1, steps = 4\n",
      "02:12:00 [INFO] train episode 2554: winner = 0, steps = 8\n",
      "02:12:03 [INFO] train episode 2555: winner = 0, steps = 8\n",
      "02:12:06 [INFO] train episode 2556: winner = 1, steps = 6\n",
      "02:12:06 [INFO] train episode 2557: winner = -1, steps = 5\n",
      "02:12:07 [INFO] train episode 2558: winner = 1, steps = 4\n",
      "02:12:08 [INFO] train episode 2559: winner = 0, steps = 8\n",
      "02:12:09 [INFO] train episode 2560: winner = 1, steps = 4\n",
      "02:12:13 [INFO] train episode 2561: winner = 0, steps = 8\n",
      "02:12:15 [INFO] train episode 2562: winner = 1, steps = 4\n",
      "02:12:17 [INFO] train episode 2563: winner = -1, steps = 5\n",
      "02:12:17 [INFO] train episode 2564: winner = 1, steps = 6\n",
      "02:12:22 [INFO] train episode 2565: winner = 0, steps = 8\n",
      "02:12:25 [INFO] train episode 2566: winner = -1, steps = 5\n",
      "02:12:27 [INFO] train episode 2567: winner = 1, steps = 4\n",
      "02:12:29 [INFO] train episode 2568: winner = 1, steps = 6\n",
      "02:12:33 [INFO] train episode 2569: winner = 1, steps = 6\n",
      "02:12:36 [INFO] train episode 2570: winner = 1, steps = 6\n",
      "02:12:38 [INFO] train episode 2571: winner = 1, steps = 6\n",
      "02:12:38 [INFO] train episode 2572: winner = 1, steps = 4\n",
      "02:12:40 [INFO] train episode 2573: winner = 1, steps = 4\n",
      "02:12:40 [INFO] train episode 2574: winner = 1, steps = 6\n",
      "02:12:43 [INFO] train episode 2575: winner = 0, steps = 8\n",
      "02:12:43 [INFO] train episode 2576: winner = 1, steps = 4\n",
      "02:12:46 [INFO] train episode 2577: winner = 1, steps = 6\n",
      "02:12:46 [INFO] train episode 2578: winner = 0, steps = 8\n",
      "02:12:47 [INFO] train episode 2579: winner = 0, steps = 8\n",
      "02:12:50 [INFO] train episode 2580: winner = -1, steps = 5\n",
      "02:12:50 [INFO] train episode 2581: winner = 1, steps = 4\n",
      "02:12:51 [INFO] train episode 2582: winner = 1, steps = 6\n",
      "02:12:53 [INFO] train episode 2583: winner = 1, steps = 6\n",
      "02:12:53 [INFO] train episode 2584: winner = 1, steps = 4\n",
      "02:12:54 [INFO] train episode 2585: winner = 1, steps = 6\n",
      "02:12:55 [INFO] train episode 2586: winner = -1, steps = 5\n",
      "02:12:57 [INFO] train episode 2587: winner = -1, steps = 5\n",
      "02:12:57 [INFO] train episode 2588: winner = 1, steps = 4\n",
      "02:12:59 [INFO] train episode 2589: winner = 1, steps = 6\n",
      "02:13:00 [INFO] train episode 2590: winner = 1, steps = 6\n",
      "02:13:00 [INFO] train episode 2591: winner = 1, steps = 6\n",
      "02:13:01 [INFO] train episode 2592: winner = 0, steps = 8\n",
      "02:13:04 [INFO] train episode 2593: winner = 1, steps = 6\n",
      "02:13:04 [INFO] train episode 2594: winner = 0, steps = 8\n",
      "02:13:05 [INFO] train episode 2595: winner = 1, steps = 6\n",
      "02:13:06 [INFO] train episode 2596: winner = 0, steps = 8\n",
      "02:13:07 [INFO] train episode 2597: winner = 0, steps = 8\n",
      "02:13:07 [INFO] train episode 2598: winner = 1, steps = 6\n",
      "02:13:07 [INFO] train episode 2599: winner = -1, steps = 5\n",
      "02:13:09 [INFO] train episode 2600: winner = 0, steps = 8\n",
      "02:13:09 [INFO] train episode 2601: winner = 1, steps = 6\n",
      "02:13:10 [INFO] train episode 2602: winner = -1, steps = 7\n",
      "02:13:10 [INFO] train episode 2603: winner = 0, steps = 8\n",
      "02:13:11 [INFO] train episode 2604: winner = 1, steps = 4\n",
      "02:13:12 [INFO] train episode 2605: winner = 1, steps = 4\n",
      "02:13:12 [INFO] train episode 2606: winner = 0, steps = 8\n",
      "02:13:12 [INFO] train episode 2607: winner = 0, steps = 8\n",
      "02:13:13 [INFO] train episode 2608: winner = 1, steps = 4\n",
      "02:13:13 [INFO] train episode 2609: winner = 1, steps = 6\n",
      "02:13:13 [INFO] train episode 2610: winner = 1, steps = 4\n",
      "02:13:15 [INFO] train episode 2611: winner = 1, steps = 6\n",
      "02:13:15 [INFO] train episode 2612: winner = 1, steps = 4\n",
      "02:13:18 [INFO] train episode 2613: winner = 0, steps = 8\n",
      "02:13:20 [INFO] train episode 2614: winner = 1, steps = 6\n",
      "02:13:22 [INFO] train episode 2615: winner = 1, steps = 4\n",
      "02:13:23 [INFO] train episode 2616: winner = 1, steps = 6\n",
      "02:13:26 [INFO] train episode 2617: winner = 1, steps = 6\n",
      "02:13:27 [INFO] train episode 2618: winner = 1, steps = 8\n",
      "02:13:29 [INFO] train episode 2619: winner = 0, steps = 8\n",
      "02:13:29 [INFO] train episode 2620: winner = 1, steps = 4\n",
      "02:13:30 [INFO] train episode 2621: winner = 1, steps = 6\n",
      "02:13:30 [INFO] train episode 2622: winner = 1, steps = 4\n",
      "02:13:32 [INFO] train episode 2623: winner = 1, steps = 4\n",
      "02:13:33 [INFO] train episode 2624: winner = 0, steps = 8\n",
      "02:13:34 [INFO] train episode 2625: winner = 1, steps = 6\n",
      "02:13:34 [INFO] train episode 2626: winner = 1, steps = 6\n",
      "02:13:34 [INFO] train episode 2627: winner = 0, steps = 8\n",
      "02:13:35 [INFO] train episode 2628: winner = 1, steps = 6\n",
      "02:13:35 [INFO] train episode 2629: winner = 0, steps = 8\n",
      "02:13:35 [INFO] train episode 2630: winner = 1, steps = 6\n",
      "02:13:35 [INFO] train episode 2631: winner = 1, steps = 4\n",
      "02:13:36 [INFO] train episode 2632: winner = 1, steps = 6\n",
      "02:13:38 [INFO] train episode 2633: winner = 1, steps = 4\n",
      "02:13:40 [INFO] train episode 2634: winner = 1, steps = 6\n",
      "02:13:40 [INFO] train episode 2635: winner = -1, steps = 5\n",
      "02:13:40 [INFO] train episode 2636: winner = 1, steps = 6\n",
      "02:13:41 [INFO] train episode 2637: winner = 1, steps = 4\n",
      "02:13:41 [INFO] test episode 2637:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "02:13:50 [INFO] step 0：player 1, action (0, 1)\n",
      "+o+\n",
      "+++\n",
      "+++\n",
      "02:13:57 [INFO] step 1：player -1, action (0, 0)\n",
      "xo+\n",
      "+++\n",
      "+++\n",
      "02:14:04 [INFO] step 2：player 1, action (1, 0)\n",
      "xo+\n",
      "o++\n",
      "+++\n",
      "02:14:07 [INFO] step 3：player -1, action (1, 1)\n",
      "xo+\n",
      "ox+\n",
      "+++\n",
      "02:14:08 [INFO] step 4：player 1, action (2, 1)\n",
      "xo+\n",
      "ox+\n",
      "+o+\n",
      "02:14:09 [INFO] step 5：player -1, action (2, 0)\n",
      "xo+\n",
      "ox+\n",
      "xo+\n",
      "02:14:09 [INFO] step 6：player 1, action (2, 2)\n",
      "xo+\n",
      "ox+\n",
      "xoo\n",
      "02:14:09 [INFO] step 7：player -1, action (0, 2)\n",
      "xox\n",
      "ox+\n",
      "xoo\n",
      "02:14:09 [INFO] test episode 2637: winner = -1, steps = 7\n",
      "02:14:26 [INFO] train episode 2638: winner = 0, steps = 8\n",
      "02:14:44 [INFO] train episode 2639: winner = 0, steps = 8\n",
      "02:14:45 [INFO] train episode 2640: winner = 1, steps = 6\n",
      "02:14:56 [INFO] train episode 2641: winner = 1, steps = 4\n",
      "02:15:10 [INFO] train episode 2642: winner = 0, steps = 8\n",
      "02:15:25 [INFO] train episode 2643: winner = 0, steps = 8\n",
      "02:15:33 [INFO] train episode 2644: winner = 1, steps = 6\n",
      "02:15:38 [INFO] train episode 2645: winner = 1, steps = 4\n",
      "02:15:44 [INFO] train episode 2646: winner = 0, steps = 8\n",
      "02:15:50 [INFO] train episode 2647: winner = 0, steps = 8\n",
      "02:15:54 [INFO] train episode 2648: winner = -1, steps = 5\n",
      "02:16:04 [INFO] train episode 2649: winner = 1, steps = 6\n",
      "02:16:06 [INFO] train episode 2650: winner = 1, steps = 6\n",
      "02:16:11 [INFO] train episode 2651: winner = -1, steps = 7\n",
      "02:16:15 [INFO] train episode 2652: winner = 1, steps = 4\n",
      "02:16:18 [INFO] train episode 2653: winner = 0, steps = 8\n",
      "02:16:21 [INFO] train episode 2654: winner = 1, steps = 4\n",
      "02:16:29 [INFO] train episode 2655: winner = 0, steps = 8\n",
      "02:16:34 [INFO] train episode 2656: winner = 1, steps = 6\n",
      "02:16:37 [INFO] train episode 2657: winner = 0, steps = 8\n",
      "02:16:40 [INFO] train episode 2658: winner = 1, steps = 6\n",
      "02:16:42 [INFO] train episode 2659: winner = 1, steps = 6\n",
      "02:16:57 [INFO] train episode 2660: winner = 0, steps = 8\n",
      "02:16:58 [INFO] train episode 2661: winner = 0, steps = 8\n",
      "02:17:04 [INFO] train episode 2662: winner = 0, steps = 8\n",
      "02:17:10 [INFO] train episode 2663: winner = 1, steps = 8\n",
      "02:17:15 [INFO] train episode 2664: winner = -1, steps = 5\n",
      "02:17:17 [INFO] train episode 2665: winner = -1, steps = 5\n",
      "02:17:17 [INFO] train episode 2666: winner = 1, steps = 4\n",
      "02:17:18 [INFO] train episode 2667: winner = -1, steps = 5\n",
      "02:17:23 [INFO] train episode 2668: winner = 0, steps = 8\n",
      "02:17:24 [INFO] train episode 2669: winner = 1, steps = 4\n",
      "02:17:25 [INFO] train episode 2670: winner = 0, steps = 8\n",
      "02:17:27 [INFO] train episode 2671: winner = 1, steps = 4\n",
      "02:17:30 [INFO] train episode 2672: winner = 1, steps = 8\n",
      "02:17:31 [INFO] train episode 2673: winner = -1, steps = 5\n",
      "02:17:33 [INFO] train episode 2674: winner = 1, steps = 4\n",
      "02:17:34 [INFO] train episode 2675: winner = 1, steps = 4\n",
      "02:17:34 [INFO] train episode 2676: winner = 1, steps = 4\n",
      "02:17:35 [INFO] train episode 2677: winner = 1, steps = 6\n",
      "02:17:36 [INFO] train episode 2678: winner = 1, steps = 6\n",
      "02:17:37 [INFO] train episode 2679: winner = 1, steps = 6\n",
      "02:17:38 [INFO] train episode 2680: winner = 1, steps = 6\n",
      "02:17:40 [INFO] train episode 2681: winner = 1, steps = 4\n",
      "02:17:43 [INFO] train episode 2682: winner = 1, steps = 6\n",
      "02:17:45 [INFO] train episode 2683: winner = 1, steps = 4\n",
      "02:17:49 [INFO] train episode 2684: winner = 0, steps = 8\n",
      "02:17:50 [INFO] train episode 2685: winner = 1, steps = 6\n",
      "02:17:51 [INFO] train episode 2686: winner = 1, steps = 6\n",
      "02:17:53 [INFO] train episode 2687: winner = 0, steps = 8\n",
      "02:17:53 [INFO] train episode 2688: winner = 0, steps = 8\n",
      "02:17:54 [INFO] train episode 2689: winner = 1, steps = 6\n",
      "02:17:56 [INFO] train episode 2690: winner = 1, steps = 6\n",
      "02:17:57 [INFO] train episode 2691: winner = 1, steps = 4\n",
      "02:17:57 [INFO] train episode 2692: winner = 1, steps = 6\n",
      "02:17:57 [INFO] train episode 2693: winner = 1, steps = 4\n",
      "02:18:00 [INFO] train episode 2694: winner = -1, steps = 5\n",
      "02:18:00 [INFO] train episode 2695: winner = 1, steps = 4\n",
      "02:18:02 [INFO] train episode 2696: winner = 1, steps = 4\n",
      "02:18:04 [INFO] train episode 2697: winner = 1, steps = 6\n",
      "02:18:05 [INFO] train episode 2698: winner = 0, steps = 8\n",
      "02:18:07 [INFO] train episode 2699: winner = -1, steps = 5\n",
      "02:18:10 [INFO] train episode 2700: winner = 1, steps = 6\n",
      "02:18:13 [INFO] train episode 2701: winner = 1, steps = 4\n",
      "02:18:17 [INFO] train episode 2702: winner = 0, steps = 8\n",
      "02:18:18 [INFO] train episode 2703: winner = 1, steps = 6\n",
      "02:18:18 [INFO] train episode 2704: winner = 0, steps = 8\n",
      "02:18:22 [INFO] train episode 2705: winner = 0, steps = 8\n",
      "02:18:25 [INFO] train episode 2706: winner = 0, steps = 8\n",
      "02:18:25 [INFO] train episode 2707: winner = 0, steps = 8\n",
      "02:18:26 [INFO] train episode 2708: winner = 1, steps = 4\n",
      "02:18:28 [INFO] train episode 2709: winner = 1, steps = 4\n",
      "02:18:32 [INFO] train episode 2710: winner = 0, steps = 8\n",
      "02:18:33 [INFO] train episode 2711: winner = 1, steps = 8\n",
      "02:18:33 [INFO] train episode 2712: winner = 1, steps = 4\n",
      "02:18:33 [INFO] train episode 2713: winner = 1, steps = 6\n",
      "02:18:33 [INFO] train episode 2714: winner = 1, steps = 4\n",
      "02:18:34 [INFO] train episode 2715: winner = 1, steps = 8\n",
      "02:18:34 [INFO] train episode 2716: winner = 0, steps = 8\n",
      "02:18:37 [INFO] train episode 2717: winner = 0, steps = 8\n",
      "02:18:38 [INFO] train episode 2718: winner = 0, steps = 8\n",
      "02:18:40 [INFO] train episode 2719: winner = 0, steps = 8\n",
      "02:18:42 [INFO] train episode 2720: winner = 0, steps = 8\n",
      "02:18:44 [INFO] train episode 2721: winner = 0, steps = 8\n",
      "02:18:44 [INFO] train episode 2722: winner = -1, steps = 5\n",
      "02:18:46 [INFO] train episode 2723: winner = 1, steps = 6\n",
      "02:18:48 [INFO] train episode 2724: winner = 0, steps = 8\n",
      "02:18:50 [INFO] train episode 2725: winner = 1, steps = 6\n",
      "02:18:50 [INFO] train episode 2726: winner = 1, steps = 6\n",
      "02:18:50 [INFO] train episode 2727: winner = 0, steps = 8\n",
      "02:18:50 [INFO] train episode 2728: winner = 1, steps = 6\n",
      "02:18:50 [INFO] train episode 2729: winner = 1, steps = 6\n",
      "02:18:52 [INFO] train episode 2730: winner = 0, steps = 8\n",
      "02:18:53 [INFO] train episode 2731: winner = 1, steps = 6\n",
      "02:18:55 [INFO] train episode 2732: winner = 1, steps = 4\n",
      "02:18:56 [INFO] train episode 2733: winner = 0, steps = 8\n",
      "02:19:01 [INFO] train episode 2734: winner = 0, steps = 8\n",
      "02:19:02 [INFO] train episode 2735: winner = 1, steps = 4\n",
      "02:19:03 [INFO] train episode 2736: winner = 1, steps = 4\n",
      "02:19:04 [INFO] train episode 2737: winner = 1, steps = 6\n",
      "02:19:05 [INFO] train episode 2738: winner = 0, steps = 8\n",
      "02:19:06 [INFO] train episode 2739: winner = -1, steps = 5\n",
      "02:19:07 [INFO] train episode 2740: winner = 1, steps = 6\n",
      "02:19:08 [INFO] train episode 2741: winner = 1, steps = 4\n",
      "02:19:10 [INFO] train episode 2742: winner = 0, steps = 8\n",
      "02:19:11 [INFO] train episode 2743: winner = 1, steps = 4\n",
      "02:19:11 [INFO] train episode 2744: winner = -1, steps = 5\n",
      "02:19:17 [INFO] train episode 2745: winner = 0, steps = 8\n",
      "02:19:18 [INFO] train episode 2746: winner = 0, steps = 8\n",
      "02:19:19 [INFO] train episode 2747: winner = 1, steps = 6\n",
      "02:19:21 [INFO] train episode 2748: winner = 0, steps = 8\n",
      "02:19:21 [INFO] train episode 2749: winner = 1, steps = 4\n",
      "02:19:23 [INFO] train episode 2750: winner = 1, steps = 4\n",
      "02:19:24 [INFO] train episode 2751: winner = 0, steps = 8\n",
      "02:19:24 [INFO] train episode 2752: winner = 1, steps = 4\n",
      "02:19:24 [INFO] train episode 2753: winner = 1, steps = 4\n",
      "02:19:24 [INFO] train episode 2754: winner = 0, steps = 8\n",
      "02:19:27 [INFO] train episode 2755: winner = 1, steps = 6\n",
      "02:19:28 [INFO] train episode 2756: winner = 1, steps = 8\n",
      "02:19:28 [INFO] train episode 2757: winner = 0, steps = 8\n",
      "02:19:28 [INFO] train episode 2758: winner = 1, steps = 4\n",
      "02:19:29 [INFO] train episode 2759: winner = 0, steps = 8\n",
      "02:19:29 [INFO] train episode 2760: winner = 1, steps = 6\n",
      "02:19:30 [INFO] train episode 2761: winner = 0, steps = 8\n",
      "02:19:31 [INFO] train episode 2762: winner = 1, steps = 6\n",
      "02:19:33 [INFO] train episode 2763: winner = 1, steps = 6\n",
      "02:19:36 [INFO] train episode 2764: winner = 1, steps = 6\n",
      "02:19:38 [INFO] train episode 2765: winner = 0, steps = 8\n",
      "02:19:40 [INFO] train episode 2766: winner = 1, steps = 6\n",
      "02:19:41 [INFO] train episode 2767: winner = 0, steps = 8\n",
      "02:19:43 [INFO] train episode 2768: winner = 1, steps = 4\n",
      "02:19:44 [INFO] train episode 2769: winner = 0, steps = 8\n",
      "02:19:46 [INFO] train episode 2770: winner = 0, steps = 8\n",
      "02:19:46 [INFO] train episode 2771: winner = 0, steps = 8\n",
      "02:19:47 [INFO] train episode 2772: winner = 0, steps = 8\n",
      "02:19:49 [INFO] train episode 2773: winner = 0, steps = 8\n",
      "02:19:50 [INFO] train episode 2774: winner = 1, steps = 6\n",
      "02:19:50 [INFO] test episode 2774:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "02:19:59 [INFO] step 0：player 1, action (1, 1)\n",
      "+++\n",
      "+o+\n",
      "+++\n",
      "02:20:07 [INFO] step 1：player -1, action (2, 2)\n",
      "+++\n",
      "+o+\n",
      "++x\n",
      "02:20:14 [INFO] step 2：player 1, action (1, 2)\n",
      "+++\n",
      "+oo\n",
      "++x\n",
      "02:20:15 [INFO] step 3：player -1, action (2, 1)\n",
      "+++\n",
      "+oo\n",
      "+xx\n",
      "02:20:15 [INFO] step 4：player 1, action (1, 0)\n",
      "+++\n",
      "ooo\n",
      "+xx\n",
      "02:20:15 [INFO] test episode 2774: winner = 1, steps = 4\n",
      "02:20:23 [INFO] train episode 2775: winner = 0, steps = 8\n",
      "02:20:38 [INFO] train episode 2776: winner = -1, steps = 5\n",
      "02:20:48 [INFO] train episode 2777: winner = 1, steps = 6\n",
      "02:20:59 [INFO] train episode 2778: winner = 1, steps = 6\n",
      "02:21:17 [INFO] train episode 2779: winner = 0, steps = 8\n",
      "02:21:24 [INFO] train episode 2780: winner = 1, steps = 4\n",
      "02:21:29 [INFO] train episode 2781: winner = 0, steps = 8\n",
      "02:21:33 [INFO] train episode 2782: winner = 1, steps = 6\n",
      "02:21:35 [INFO] train episode 2783: winner = -1, steps = 5\n",
      "02:21:37 [INFO] train episode 2784: winner = -1, steps = 5\n",
      "02:21:47 [INFO] train episode 2785: winner = 1, steps = 4\n",
      "02:21:52 [INFO] train episode 2786: winner = -1, steps = 7\n",
      "02:21:56 [INFO] train episode 2787: winner = 1, steps = 6\n",
      "02:22:03 [INFO] train episode 2788: winner = 1, steps = 6\n",
      "02:22:11 [INFO] train episode 2789: winner = 0, steps = 8\n",
      "02:22:13 [INFO] train episode 2790: winner = 1, steps = 4\n",
      "02:22:18 [INFO] train episode 2791: winner = -1, steps = 5\n",
      "02:22:18 [INFO] train episode 2792: winner = 0, steps = 8\n",
      "02:22:19 [INFO] train episode 2793: winner = -1, steps = 5\n",
      "02:22:21 [INFO] train episode 2794: winner = 1, steps = 4\n",
      "02:22:25 [INFO] train episode 2795: winner = 0, steps = 8\n",
      "02:22:34 [INFO] train episode 2796: winner = 1, steps = 8\n",
      "02:22:35 [INFO] train episode 2797: winner = 1, steps = 6\n",
      "02:22:41 [INFO] train episode 2798: winner = -1, steps = 7\n",
      "02:22:41 [INFO] train episode 2799: winner = 1, steps = 4\n",
      "02:22:43 [INFO] train episode 2800: winner = 1, steps = 4\n",
      "02:22:47 [INFO] train episode 2801: winner = 0, steps = 8\n",
      "02:22:49 [INFO] train episode 2802: winner = 1, steps = 6\n",
      "02:22:50 [INFO] train episode 2803: winner = 1, steps = 4\n",
      "02:22:53 [INFO] train episode 2804: winner = 0, steps = 8\n",
      "02:23:00 [INFO] train episode 2805: winner = 0, steps = 8\n",
      "02:23:01 [INFO] train episode 2806: winner = -1, steps = 7\n",
      "02:23:09 [INFO] train episode 2807: winner = 0, steps = 8\n",
      "02:23:12 [INFO] train episode 2808: winner = 1, steps = 6\n",
      "02:23:15 [INFO] train episode 2809: winner = 0, steps = 8\n",
      "02:23:19 [INFO] train episode 2810: winner = 1, steps = 4\n",
      "02:23:21 [INFO] train episode 2811: winner = 1, steps = 4\n",
      "02:23:25 [INFO] train episode 2812: winner = 0, steps = 8\n",
      "02:23:27 [INFO] train episode 2813: winner = 1, steps = 4\n",
      "02:23:29 [INFO] train episode 2814: winner = 0, steps = 8\n",
      "02:23:30 [INFO] train episode 2815: winner = 1, steps = 4\n",
      "02:23:30 [INFO] train episode 2816: winner = 0, steps = 8\n",
      "02:23:31 [INFO] train episode 2817: winner = -1, steps = 5\n",
      "02:23:31 [INFO] train episode 2818: winner = 1, steps = 4\n",
      "02:23:34 [INFO] train episode 2819: winner = 1, steps = 6\n",
      "02:23:36 [INFO] train episode 2820: winner = 1, steps = 4\n",
      "02:23:42 [INFO] train episode 2821: winner = 0, steps = 8\n",
      "02:23:42 [INFO] train episode 2822: winner = 1, steps = 6\n",
      "02:23:44 [INFO] train episode 2823: winner = 1, steps = 6\n",
      "02:23:46 [INFO] train episode 2824: winner = 1, steps = 4\n",
      "02:23:46 [INFO] train episode 2825: winner = 1, steps = 6\n",
      "02:23:46 [INFO] train episode 2826: winner = 1, steps = 6\n",
      "02:23:47 [INFO] train episode 2827: winner = 1, steps = 6\n",
      "02:23:47 [INFO] train episode 2828: winner = 0, steps = 8\n",
      "02:23:51 [INFO] train episode 2829: winner = 0, steps = 8\n",
      "02:23:52 [INFO] train episode 2830: winner = 1, steps = 4\n",
      "02:23:59 [INFO] train episode 2831: winner = 0, steps = 8\n",
      "02:24:03 [INFO] train episode 2832: winner = 0, steps = 8\n",
      "02:24:07 [INFO] train episode 2833: winner = 0, steps = 8\n",
      "02:24:08 [INFO] train episode 2834: winner = 1, steps = 6\n",
      "02:24:11 [INFO] train episode 2835: winner = -1, steps = 7\n",
      "02:24:11 [INFO] train episode 2836: winner = 1, steps = 4\n",
      "02:24:11 [INFO] train episode 2837: winner = 1, steps = 6\n",
      "02:24:14 [INFO] train episode 2838: winner = -1, steps = 7\n",
      "02:24:16 [INFO] train episode 2839: winner = 1, steps = 6\n",
      "02:24:18 [INFO] train episode 2840: winner = 0, steps = 8\n",
      "02:24:18 [INFO] train episode 2841: winner = 1, steps = 6\n",
      "02:24:24 [INFO] train episode 2842: winner = -1, steps = 7\n",
      "02:24:27 [INFO] train episode 2843: winner = 0, steps = 8\n",
      "02:24:30 [INFO] train episode 2844: winner = 0, steps = 8\n",
      "02:24:32 [INFO] train episode 2845: winner = 1, steps = 6\n",
      "02:24:32 [INFO] train episode 2846: winner = 1, steps = 6\n",
      "02:24:32 [INFO] train episode 2847: winner = 1, steps = 6\n",
      "02:24:34 [INFO] train episode 2848: winner = -1, steps = 5\n",
      "02:24:34 [INFO] train episode 2849: winner = 1, steps = 6\n",
      "02:24:34 [INFO] train episode 2850: winner = -1, steps = 5\n",
      "02:24:36 [INFO] train episode 2851: winner = 0, steps = 8\n",
      "02:24:39 [INFO] train episode 2852: winner = 1, steps = 6\n",
      "02:24:39 [INFO] train episode 2853: winner = 1, steps = 4\n",
      "02:24:41 [INFO] train episode 2854: winner = 0, steps = 8\n",
      "02:24:43 [INFO] train episode 2855: winner = 0, steps = 8\n",
      "02:24:46 [INFO] train episode 2856: winner = 0, steps = 8\n",
      "02:24:49 [INFO] train episode 2857: winner = 1, steps = 4\n",
      "02:24:50 [INFO] train episode 2858: winner = 1, steps = 6\n",
      "02:24:50 [INFO] train episode 2859: winner = 1, steps = 6\n",
      "02:24:51 [INFO] train episode 2860: winner = 0, steps = 8\n",
      "02:24:51 [INFO] train episode 2861: winner = 1, steps = 6\n",
      "02:24:52 [INFO] train episode 2862: winner = 1, steps = 6\n",
      "02:24:52 [INFO] train episode 2863: winner = 0, steps = 8\n",
      "02:24:53 [INFO] train episode 2864: winner = 1, steps = 6\n",
      "02:24:54 [INFO] train episode 2865: winner = 0, steps = 8\n",
      "02:24:56 [INFO] train episode 2866: winner = 1, steps = 4\n",
      "02:24:56 [INFO] train episode 2867: winner = 1, steps = 4\n",
      "02:24:59 [INFO] train episode 2868: winner = 0, steps = 8\n",
      "02:24:59 [INFO] train episode 2869: winner = 1, steps = 6\n",
      "02:25:02 [INFO] train episode 2870: winner = 0, steps = 8\n",
      "02:25:02 [INFO] train episode 2871: winner = -1, steps = 5\n",
      "02:25:02 [INFO] train episode 2872: winner = 1, steps = 6\n",
      "02:25:05 [INFO] train episode 2873: winner = 1, steps = 6\n",
      "02:25:05 [INFO] train episode 2874: winner = 1, steps = 6\n",
      "02:25:05 [INFO] train episode 2875: winner = 0, steps = 8\n",
      "02:25:06 [INFO] train episode 2876: winner = 0, steps = 8\n",
      "02:25:09 [INFO] train episode 2877: winner = 1, steps = 6\n",
      "02:25:12 [INFO] train episode 2878: winner = 1, steps = 8\n",
      "02:25:13 [INFO] train episode 2879: winner = 0, steps = 8\n",
      "02:25:13 [INFO] train episode 2880: winner = 0, steps = 8\n",
      "02:25:14 [INFO] train episode 2881: winner = 0, steps = 8\n",
      "02:25:14 [INFO] train episode 2882: winner = 1, steps = 6\n",
      "02:25:19 [INFO] train episode 2883: winner = -1, steps = 7\n",
      "02:25:21 [INFO] train episode 2884: winner = 1, steps = 8\n",
      "02:25:22 [INFO] train episode 2885: winner = -1, steps = 7\n",
      "02:25:22 [INFO] train episode 2886: winner = 0, steps = 8\n",
      "02:25:24 [INFO] train episode 2887: winner = 1, steps = 4\n",
      "02:25:24 [INFO] train episode 2888: winner = 1, steps = 6\n",
      "02:25:24 [INFO] train episode 2889: winner = 0, steps = 8\n",
      "02:25:25 [INFO] train episode 2890: winner = 1, steps = 6\n",
      "02:25:26 [INFO] train episode 2891: winner = 1, steps = 6\n",
      "02:25:26 [INFO] train episode 2892: winner = 0, steps = 8\n",
      "02:25:26 [INFO] train episode 2893: winner = -1, steps = 7\n",
      "02:25:27 [INFO] train episode 2894: winner = 1, steps = 4\n",
      "02:25:31 [INFO] train episode 2895: winner = 1, steps = 6\n",
      "02:25:32 [INFO] train episode 2896: winner = -1, steps = 7\n",
      "02:25:35 [INFO] train episode 2897: winner = 0, steps = 8\n",
      "02:25:35 [INFO] train episode 2898: winner = 1, steps = 4\n",
      "02:25:36 [INFO] train episode 2899: winner = 1, steps = 6\n",
      "02:25:38 [INFO] train episode 2900: winner = 0, steps = 8\n",
      "02:25:42 [INFO] train episode 2901: winner = 0, steps = 8\n",
      "02:25:42 [INFO] train episode 2902: winner = 1, steps = 4\n",
      "02:25:42 [INFO] train episode 2903: winner = 1, steps = 4\n",
      "02:25:43 [INFO] train episode 2904: winner = 0, steps = 8\n",
      "02:25:43 [INFO] train episode 2905: winner = 1, steps = 4\n",
      "02:25:46 [INFO] train episode 2906: winner = -1, steps = 5\n",
      "02:25:47 [INFO] train episode 2907: winner = 0, steps = 8\n",
      "02:25:50 [INFO] train episode 2908: winner = 0, steps = 8\n",
      "02:25:51 [INFO] train episode 2909: winner = 0, steps = 8\n",
      "02:25:54 [INFO] train episode 2910: winner = 0, steps = 8\n",
      "02:25:54 [INFO] test episode 2910:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "02:26:03 [INFO] step 0：player 1, action (1, 1)\n",
      "+++\n",
      "+o+\n",
      "+++\n",
      "02:26:12 [INFO] step 1：player -1, action (0, 1)\n",
      "+x+\n",
      "+o+\n",
      "+++\n",
      "02:26:16 [INFO] step 2：player 1, action (0, 0)\n",
      "ox+\n",
      "+o+\n",
      "+++\n",
      "02:26:17 [INFO] step 3：player -1, action (2, 2)\n",
      "ox+\n",
      "+o+\n",
      "++x\n",
      "02:26:18 [INFO] step 4：player 1, action (2, 0)\n",
      "ox+\n",
      "+o+\n",
      "o+x\n",
      "02:26:18 [INFO] step 5：player -1, action (0, 2)\n",
      "oxx\n",
      "+o+\n",
      "o+x\n",
      "02:26:18 [INFO] step 6：player 1, action (1, 0)\n",
      "oxx\n",
      "oo+\n",
      "o+x\n",
      "02:26:18 [INFO] test episode 2910: winner = 1, steps = 6\n",
      "02:26:34 [INFO] train episode 2911: winner = 0, steps = 8\n",
      "02:26:47 [INFO] train episode 2912: winner = 1, steps = 4\n",
      "02:26:58 [INFO] train episode 2913: winner = 1, steps = 6\n",
      "02:27:09 [INFO] train episode 2914: winner = 1, steps = 4\n",
      "02:27:12 [INFO] train episode 2915: winner = 1, steps = 4\n",
      "02:27:29 [INFO] train episode 2916: winner = -1, steps = 7\n",
      "02:27:31 [INFO] train episode 2917: winner = 1, steps = 6\n",
      "02:27:36 [INFO] train episode 2918: winner = 0, steps = 8\n",
      "02:27:49 [INFO] train episode 2919: winner = 0, steps = 8\n",
      "02:27:50 [INFO] train episode 2920: winner = 0, steps = 8\n",
      "02:27:56 [INFO] train episode 2921: winner = -1, steps = 7\n",
      "02:27:56 [INFO] train episode 2922: winner = 0, steps = 8\n",
      "02:28:00 [INFO] train episode 2923: winner = 1, steps = 6\n",
      "02:28:03 [INFO] train episode 2924: winner = 0, steps = 8\n",
      "02:28:05 [INFO] train episode 2925: winner = 0, steps = 8\n",
      "02:28:13 [INFO] train episode 2926: winner = 0, steps = 8\n",
      "02:28:20 [INFO] train episode 2927: winner = 0, steps = 8\n",
      "02:28:23 [INFO] train episode 2928: winner = 1, steps = 6\n",
      "02:28:25 [INFO] train episode 2929: winner = 0, steps = 8\n",
      "02:28:34 [INFO] train episode 2930: winner = 0, steps = 8\n",
      "02:28:41 [INFO] train episode 2931: winner = -1, steps = 7\n",
      "02:28:45 [INFO] train episode 2932: winner = 1, steps = 4\n",
      "02:28:48 [INFO] train episode 2933: winner = 0, steps = 8\n",
      "02:28:53 [INFO] train episode 2934: winner = 0, steps = 8\n",
      "02:28:58 [INFO] train episode 2935: winner = -1, steps = 7\n",
      "02:29:03 [INFO] train episode 2936: winner = 0, steps = 8\n",
      "02:29:04 [INFO] train episode 2937: winner = 1, steps = 4\n",
      "02:29:14 [INFO] train episode 2938: winner = 0, steps = 8\n",
      "02:29:19 [INFO] train episode 2939: winner = 0, steps = 8\n",
      "02:29:19 [INFO] train episode 2940: winner = 0, steps = 8\n",
      "02:29:22 [INFO] train episode 2941: winner = 1, steps = 6\n",
      "02:29:31 [INFO] train episode 2942: winner = 0, steps = 8\n",
      "02:29:34 [INFO] train episode 2943: winner = 1, steps = 6\n",
      "02:29:35 [INFO] train episode 2944: winner = 1, steps = 6\n",
      "02:29:38 [INFO] train episode 2945: winner = 1, steps = 6\n",
      "02:29:45 [INFO] train episode 2946: winner = 0, steps = 8\n",
      "02:29:52 [INFO] train episode 2947: winner = 0, steps = 8\n",
      "02:29:52 [INFO] train episode 2948: winner = 1, steps = 4\n",
      "02:29:54 [INFO] train episode 2949: winner = 0, steps = 8\n",
      "02:29:54 [INFO] train episode 2950: winner = 0, steps = 8\n",
      "02:29:55 [INFO] train episode 2951: winner = 1, steps = 6\n",
      "02:29:56 [INFO] train episode 2952: winner = 1, steps = 8\n",
      "02:29:59 [INFO] train episode 2953: winner = 1, steps = 4\n",
      "02:30:01 [INFO] train episode 2954: winner = 1, steps = 8\n",
      "02:30:09 [INFO] train episode 2955: winner = 1, steps = 6\n",
      "02:30:09 [INFO] train episode 2956: winner = 0, steps = 8\n",
      "02:30:12 [INFO] train episode 2957: winner = 1, steps = 6\n",
      "02:30:17 [INFO] train episode 2958: winner = 1, steps = 6\n",
      "02:30:19 [INFO] train episode 2959: winner = 1, steps = 4\n",
      "02:30:21 [INFO] train episode 2960: winner = 0, steps = 8\n",
      "02:30:22 [INFO] train episode 2961: winner = -1, steps = 7\n",
      "02:30:22 [INFO] train episode 2962: winner = 1, steps = 4\n",
      "02:30:22 [INFO] train episode 2963: winner = 0, steps = 8\n",
      "02:30:26 [INFO] train episode 2964: winner = 0, steps = 8\n",
      "02:30:29 [INFO] train episode 2965: winner = -1, steps = 7\n",
      "02:30:30 [INFO] train episode 2966: winner = 1, steps = 8\n",
      "02:30:33 [INFO] train episode 2967: winner = 0, steps = 8\n",
      "02:30:34 [INFO] train episode 2968: winner = 1, steps = 4\n",
      "02:30:37 [INFO] train episode 2969: winner = 1, steps = 6\n",
      "02:30:41 [INFO] train episode 2970: winner = -1, steps = 5\n",
      "02:30:45 [INFO] train episode 2971: winner = 0, steps = 8\n",
      "02:30:48 [INFO] train episode 2972: winner = 0, steps = 8\n",
      "02:30:49 [INFO] train episode 2973: winner = 1, steps = 6\n",
      "02:30:50 [INFO] train episode 2974: winner = 1, steps = 6\n",
      "02:30:50 [INFO] train episode 2975: winner = -1, steps = 7\n",
      "02:30:51 [INFO] train episode 2976: winner = 1, steps = 4\n",
      "02:30:53 [INFO] train episode 2977: winner = 1, steps = 4\n",
      "02:30:54 [INFO] train episode 2978: winner = 1, steps = 6\n",
      "02:30:54 [INFO] train episode 2979: winner = 1, steps = 6\n",
      "02:30:56 [INFO] train episode 2980: winner = 1, steps = 4\n",
      "02:30:58 [INFO] train episode 2981: winner = 0, steps = 8\n",
      "02:30:59 [INFO] train episode 2982: winner = 1, steps = 4\n",
      "02:30:59 [INFO] train episode 2983: winner = 1, steps = 4\n",
      "02:30:59 [INFO] train episode 2984: winner = 0, steps = 8\n",
      "02:30:59 [INFO] train episode 2985: winner = 0, steps = 8\n",
      "02:30:59 [INFO] train episode 2986: winner = -1, steps = 7\n",
      "02:31:00 [INFO] train episode 2987: winner = 0, steps = 8\n",
      "02:31:01 [INFO] train episode 2988: winner = 1, steps = 8\n",
      "02:31:06 [INFO] train episode 2989: winner = 0, steps = 8\n",
      "02:31:07 [INFO] train episode 2990: winner = 0, steps = 8\n",
      "02:31:07 [INFO] train episode 2991: winner = 1, steps = 6\n",
      "02:31:08 [INFO] train episode 2992: winner = 1, steps = 4\n",
      "02:31:09 [INFO] train episode 2993: winner = 0, steps = 8\n",
      "02:31:10 [INFO] train episode 2994: winner = 0, steps = 8\n",
      "02:31:13 [INFO] train episode 2995: winner = 0, steps = 8\n",
      "02:31:13 [INFO] train episode 2996: winner = 1, steps = 6\n",
      "02:31:13 [INFO] train episode 2997: winner = -1, steps = 7\n",
      "02:31:15 [INFO] train episode 2998: winner = 1, steps = 4\n",
      "02:31:17 [INFO] train episode 2999: winner = -1, steps = 5\n",
      "02:31:17 [INFO] train episode 3000: winner = 0, steps = 8\n",
      "02:31:18 [INFO] train episode 3001: winner = 0, steps = 8\n",
      "02:31:20 [INFO] train episode 3002: winner = 1, steps = 4\n",
      "02:31:21 [INFO] train episode 3003: winner = 1, steps = 6\n",
      "02:31:24 [INFO] train episode 3004: winner = 1, steps = 6\n",
      "02:31:25 [INFO] train episode 3005: winner = 1, steps = 4\n",
      "02:31:26 [INFO] train episode 3006: winner = -1, steps = 7\n",
      "02:31:28 [INFO] train episode 3007: winner = 0, steps = 8\n",
      "02:31:28 [INFO] train episode 3008: winner = 0, steps = 8\n",
      "02:31:28 [INFO] train episode 3009: winner = 1, steps = 4\n",
      "02:31:30 [INFO] train episode 3010: winner = 1, steps = 6\n",
      "02:31:31 [INFO] train episode 3011: winner = -1, steps = 5\n",
      "02:31:31 [INFO] train episode 3012: winner = 1, steps = 6\n",
      "02:31:31 [INFO] train episode 3013: winner = 1, steps = 6\n",
      "02:31:32 [INFO] train episode 3014: winner = 1, steps = 8\n",
      "02:31:34 [INFO] train episode 3015: winner = -1, steps = 7\n",
      "02:31:36 [INFO] train episode 3016: winner = 1, steps = 6\n",
      "02:31:36 [INFO] train episode 3017: winner = 1, steps = 4\n",
      "02:31:37 [INFO] train episode 3018: winner = 1, steps = 6\n",
      "02:31:38 [INFO] train episode 3019: winner = 1, steps = 6\n",
      "02:31:38 [INFO] train episode 3020: winner = 0, steps = 8\n",
      "02:31:38 [INFO] train episode 3021: winner = 0, steps = 8\n",
      "02:31:38 [INFO] train episode 3022: winner = 1, steps = 4\n",
      "02:31:40 [INFO] train episode 3023: winner = 0, steps = 8\n",
      "02:31:42 [INFO] train episode 3024: winner = 0, steps = 8\n",
      "02:31:45 [INFO] train episode 3025: winner = 0, steps = 8\n",
      "02:31:46 [INFO] train episode 3026: winner = 0, steps = 8\n",
      "02:31:48 [INFO] train episode 3027: winner = 1, steps = 6\n",
      "02:31:50 [INFO] train episode 3028: winner = 0, steps = 8\n",
      "02:31:52 [INFO] train episode 3029: winner = -1, steps = 5\n",
      "02:31:52 [INFO] train episode 3030: winner = 0, steps = 8\n",
      "02:31:55 [INFO] train episode 3031: winner = -1, steps = 7\n",
      "02:31:58 [INFO] train episode 3032: winner = 0, steps = 8\n",
      "02:32:02 [INFO] train episode 3033: winner = 0, steps = 8\n",
      "02:32:02 [INFO] train episode 3034: winner = 0, steps = 8\n",
      "02:32:03 [INFO] train episode 3035: winner = -1, steps = 5\n",
      "02:32:07 [INFO] train episode 3036: winner = 0, steps = 8\n",
      "02:32:08 [INFO] train episode 3037: winner = 1, steps = 6\n",
      "02:32:11 [INFO] train episode 3038: winner = 1, steps = 4\n",
      "02:32:12 [INFO] train episode 3039: winner = 0, steps = 8\n",
      "02:32:12 [INFO] train episode 3040: winner = 0, steps = 8\n",
      "02:32:13 [INFO] train episode 3041: winner = 0, steps = 8\n",
      "02:32:13 [INFO] test episode 3041:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "02:32:23 [INFO] step 0：player 1, action (2, 2)\n",
      "+++\n",
      "+++\n",
      "++o\n",
      "02:32:30 [INFO] step 1：player -1, action (2, 1)\n",
      "+++\n",
      "+++\n",
      "+xo\n",
      "02:32:34 [INFO] step 2：player 1, action (0, 2)\n",
      "++o\n",
      "+++\n",
      "+xo\n",
      "02:32:34 [INFO] step 3：player -1, action (2, 0)\n",
      "++o\n",
      "+++\n",
      "xxo\n",
      "02:32:35 [INFO] step 4：player 1, action (1, 2)\n",
      "++o\n",
      "++o\n",
      "xxo\n",
      "02:32:35 [INFO] test episode 3041: winner = 1, steps = 4\n",
      "02:32:49 [INFO] train episode 3042: winner = 0, steps = 8\n",
      "02:32:52 [INFO] train episode 3043: winner = 1, steps = 4\n",
      "02:33:01 [INFO] train episode 3044: winner = -1, steps = 5\n",
      "02:33:16 [INFO] train episode 3045: winner = -1, steps = 5\n",
      "02:33:30 [INFO] train episode 3046: winner = 1, steps = 6\n",
      "02:33:31 [INFO] train episode 3047: winner = -1, steps = 5\n",
      "02:33:37 [INFO] train episode 3048: winner = 1, steps = 6\n",
      "02:33:48 [INFO] train episode 3049: winner = 0, steps = 8\n",
      "02:33:59 [INFO] train episode 3050: winner = -1, steps = 5\n",
      "02:34:02 [INFO] train episode 3051: winner = 1, steps = 6\n",
      "02:34:06 [INFO] train episode 3052: winner = 1, steps = 6\n",
      "02:34:20 [INFO] train episode 3053: winner = 0, steps = 8\n",
      "02:34:28 [INFO] train episode 3054: winner = 0, steps = 8\n",
      "02:34:32 [INFO] train episode 3055: winner = 1, steps = 6\n",
      "02:34:33 [INFO] train episode 3056: winner = 1, steps = 6\n",
      "02:34:35 [INFO] train episode 3057: winner = 0, steps = 8\n",
      "02:34:38 [INFO] train episode 3058: winner = 0, steps = 8\n",
      "02:34:43 [INFO] train episode 3059: winner = -1, steps = 7\n",
      "02:34:46 [INFO] train episode 3060: winner = 1, steps = 6\n",
      "02:34:48 [INFO] train episode 3061: winner = -1, steps = 5\n",
      "02:34:50 [INFO] train episode 3062: winner = 1, steps = 4\n",
      "02:34:50 [INFO] train episode 3063: winner = 1, steps = 6\n",
      "02:34:53 [INFO] train episode 3064: winner = 1, steps = 6\n",
      "02:34:54 [INFO] train episode 3065: winner = 1, steps = 4\n",
      "02:34:57 [INFO] train episode 3066: winner = 1, steps = 6\n",
      "02:35:01 [INFO] train episode 3067: winner = 1, steps = 8\n",
      "02:35:02 [INFO] train episode 3068: winner = 0, steps = 8\n",
      "02:35:04 [INFO] train episode 3069: winner = 1, steps = 6\n",
      "02:35:05 [INFO] train episode 3070: winner = 1, steps = 4\n",
      "02:35:07 [INFO] train episode 3071: winner = 1, steps = 6\n",
      "02:35:08 [INFO] train episode 3072: winner = 1, steps = 6\n",
      "02:35:09 [INFO] train episode 3073: winner = 0, steps = 8\n",
      "02:35:16 [INFO] train episode 3074: winner = 1, steps = 6\n",
      "02:35:16 [INFO] train episode 3075: winner = 1, steps = 4\n",
      "02:35:19 [INFO] train episode 3076: winner = 0, steps = 8\n",
      "02:35:23 [INFO] train episode 3077: winner = 0, steps = 8\n",
      "02:35:25 [INFO] train episode 3078: winner = 1, steps = 6\n",
      "02:35:27 [INFO] train episode 3079: winner = 1, steps = 6\n",
      "02:35:28 [INFO] train episode 3080: winner = 1, steps = 6\n",
      "02:35:28 [INFO] train episode 3081: winner = 0, steps = 8\n",
      "02:35:28 [INFO] train episode 3082: winner = 1, steps = 6\n",
      "02:35:31 [INFO] train episode 3083: winner = 0, steps = 8\n",
      "02:35:34 [INFO] train episode 3084: winner = 1, steps = 4\n",
      "02:35:35 [INFO] train episode 3085: winner = 1, steps = 4\n",
      "02:35:35 [INFO] train episode 3086: winner = 0, steps = 8\n",
      "02:35:45 [INFO] train episode 3087: winner = -1, steps = 7\n",
      "02:35:46 [INFO] train episode 3088: winner = 1, steps = 4\n",
      "02:35:46 [INFO] train episode 3089: winner = 0, steps = 8\n",
      "02:35:47 [INFO] train episode 3090: winner = -1, steps = 5\n",
      "02:35:50 [INFO] train episode 3091: winner = 0, steps = 8\n",
      "02:35:55 [INFO] train episode 3092: winner = 1, steps = 8\n",
      "02:35:58 [INFO] train episode 3093: winner = 1, steps = 6\n",
      "02:36:03 [INFO] train episode 3094: winner = 0, steps = 8\n",
      "02:36:05 [INFO] train episode 3095: winner = -1, steps = 5\n",
      "02:36:08 [INFO] train episode 3096: winner = 1, steps = 8\n",
      "02:36:08 [INFO] train episode 3097: winner = 1, steps = 4\n",
      "02:36:11 [INFO] train episode 3098: winner = 1, steps = 4\n",
      "02:36:11 [INFO] train episode 3099: winner = 1, steps = 4\n",
      "02:36:11 [INFO] train episode 3100: winner = 1, steps = 6\n",
      "02:36:17 [INFO] train episode 3101: winner = 1, steps = 6\n",
      "02:36:17 [INFO] train episode 3102: winner = 0, steps = 8\n",
      "02:36:22 [INFO] train episode 3103: winner = 0, steps = 8\n",
      "02:36:25 [INFO] train episode 3104: winner = -1, steps = 7\n",
      "02:36:27 [INFO] train episode 3105: winner = -1, steps = 5\n",
      "02:36:28 [INFO] train episode 3106: winner = 0, steps = 8\n",
      "02:36:29 [INFO] train episode 3107: winner = -1, steps = 5\n",
      "02:36:29 [INFO] train episode 3108: winner = 0, steps = 8\n",
      "02:36:32 [INFO] train episode 3109: winner = 1, steps = 6\n",
      "02:36:34 [INFO] train episode 3110: winner = 0, steps = 8\n",
      "02:36:37 [INFO] train episode 3111: winner = 0, steps = 8\n",
      "02:36:37 [INFO] train episode 3112: winner = 1, steps = 4\n",
      "02:36:41 [INFO] train episode 3113: winner = 0, steps = 8\n",
      "02:36:41 [INFO] train episode 3114: winner = 1, steps = 4\n",
      "02:36:45 [INFO] train episode 3115: winner = 1, steps = 6\n",
      "02:36:45 [INFO] train episode 3116: winner = 0, steps = 8\n",
      "02:36:46 [INFO] train episode 3117: winner = 0, steps = 8\n",
      "02:36:46 [INFO] train episode 3118: winner = 0, steps = 8\n",
      "02:36:47 [INFO] train episode 3119: winner = -1, steps = 5\n",
      "02:36:48 [INFO] train episode 3120: winner = 1, steps = 6\n",
      "02:36:48 [INFO] train episode 3121: winner = 1, steps = 4\n",
      "02:36:48 [INFO] train episode 3122: winner = 0, steps = 8\n",
      "02:36:49 [INFO] train episode 3123: winner = 1, steps = 6\n",
      "02:36:50 [INFO] train episode 3124: winner = 1, steps = 4\n",
      "02:36:53 [INFO] train episode 3125: winner = 0, steps = 8\n",
      "02:36:54 [INFO] train episode 3126: winner = 1, steps = 4\n",
      "02:36:56 [INFO] train episode 3127: winner = 1, steps = 6\n",
      "02:37:04 [INFO] train episode 3128: winner = 0, steps = 8\n",
      "02:37:05 [INFO] train episode 3129: winner = 1, steps = 6\n",
      "02:37:05 [INFO] train episode 3130: winner = 0, steps = 8\n",
      "02:37:10 [INFO] train episode 3131: winner = 0, steps = 8\n",
      "02:37:12 [INFO] train episode 3132: winner = 0, steps = 8\n",
      "02:37:15 [INFO] train episode 3133: winner = 1, steps = 6\n",
      "02:37:16 [INFO] train episode 3134: winner = 1, steps = 6\n",
      "02:37:19 [INFO] train episode 3135: winner = -1, steps = 7\n",
      "02:37:19 [INFO] train episode 3136: winner = 0, steps = 8\n",
      "02:37:20 [INFO] train episode 3137: winner = 1, steps = 6\n",
      "02:37:21 [INFO] train episode 3138: winner = 0, steps = 8\n",
      "02:37:22 [INFO] train episode 3139: winner = 1, steps = 6\n",
      "02:37:23 [INFO] train episode 3140: winner = 0, steps = 8\n",
      "02:37:25 [INFO] train episode 3141: winner = 1, steps = 4\n",
      "02:37:26 [INFO] train episode 3142: winner = 0, steps = 8\n",
      "02:37:27 [INFO] train episode 3143: winner = 1, steps = 6\n",
      "02:37:29 [INFO] train episode 3144: winner = -1, steps = 5\n",
      "02:37:29 [INFO] train episode 3145: winner = 1, steps = 4\n",
      "02:37:29 [INFO] train episode 3146: winner = 1, steps = 4\n",
      "02:37:31 [INFO] train episode 3147: winner = 1, steps = 6\n",
      "02:37:32 [INFO] train episode 3148: winner = 0, steps = 8\n",
      "02:37:36 [INFO] train episode 3149: winner = -1, steps = 5\n",
      "02:37:37 [INFO] train episode 3150: winner = -1, steps = 5\n",
      "02:37:40 [INFO] train episode 3151: winner = 0, steps = 8\n",
      "02:37:41 [INFO] train episode 3152: winner = 1, steps = 6\n",
      "02:37:42 [INFO] train episode 3153: winner = 1, steps = 6\n",
      "02:37:42 [INFO] train episode 3154: winner = -1, steps = 5\n",
      "02:37:43 [INFO] train episode 3155: winner = 0, steps = 8\n",
      "02:37:44 [INFO] train episode 3156: winner = 0, steps = 8\n",
      "02:37:44 [INFO] train episode 3157: winner = 1, steps = 6\n",
      "02:37:44 [INFO] train episode 3158: winner = 1, steps = 4\n",
      "02:37:46 [INFO] train episode 3159: winner = 1, steps = 4\n",
      "02:37:46 [INFO] train episode 3160: winner = 1, steps = 6\n",
      "02:37:47 [INFO] train episode 3161: winner = 1, steps = 4\n",
      "02:37:47 [INFO] train episode 3162: winner = 0, steps = 8\n",
      "02:37:48 [INFO] train episode 3163: winner = -1, steps = 5\n",
      "02:37:50 [INFO] train episode 3164: winner = 0, steps = 8\n",
      "02:37:51 [INFO] train episode 3165: winner = 1, steps = 6\n",
      "02:37:53 [INFO] train episode 3166: winner = 0, steps = 8\n",
      "02:37:56 [INFO] train episode 3167: winner = -1, steps = 5\n",
      "02:37:57 [INFO] train episode 3168: winner = 0, steps = 8\n",
      "02:37:57 [INFO] train episode 3169: winner = 0, steps = 8\n",
      "02:37:58 [INFO] train episode 3170: winner = 0, steps = 8\n",
      "02:37:59 [INFO] train episode 3171: winner = 1, steps = 6\n",
      "02:38:02 [INFO] train episode 3172: winner = 1, steps = 6\n",
      "02:38:02 [INFO] train episode 3173: winner = 0, steps = 8\n",
      "02:38:04 [INFO] train episode 3174: winner = 1, steps = 6\n",
      "02:38:06 [INFO] train episode 3175: winner = 0, steps = 8\n",
      "02:38:08 [INFO] train episode 3176: winner = 0, steps = 8\n",
      "02:38:10 [INFO] train episode 3177: winner = 1, steps = 6\n",
      "02:38:10 [INFO] test episode 3177:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "02:38:19 [INFO] step 0：player 1, action (1, 0)\n",
      "+++\n",
      "o++\n",
      "+++\n",
      "02:38:27 [INFO] step 1：player -1, action (0, 2)\n",
      "++x\n",
      "o++\n",
      "+++\n",
      "02:38:32 [INFO] step 2：player 1, action (0, 1)\n",
      "+ox\n",
      "o++\n",
      "+++\n",
      "02:38:35 [INFO] step 3：player -1, action (2, 0)\n",
      "+ox\n",
      "o++\n",
      "x++\n",
      "02:38:36 [INFO] step 4：player 1, action (1, 1)\n",
      "+ox\n",
      "oo+\n",
      "x++\n",
      "02:38:36 [INFO] step 5：player -1, action (1, 2)\n",
      "+ox\n",
      "oox\n",
      "x++\n",
      "02:38:37 [INFO] step 6：player 1, action (2, 2)\n",
      "+ox\n",
      "oox\n",
      "x+o\n",
      "02:38:37 [INFO] step 7：player -1, action (2, 1)\n",
      "+ox\n",
      "oox\n",
      "xxo\n",
      "02:38:37 [INFO] step 8：player 1, action (0, 0)\n",
      "oox\n",
      "oox\n",
      "xxo\n",
      "02:38:37 [INFO] test episode 3177: winner = 1, steps = 8\n",
      "02:38:53 [INFO] train episode 3178: winner = -1, steps = 5\n",
      "02:39:06 [INFO] train episode 3179: winner = 1, steps = 6\n",
      "02:39:20 [INFO] train episode 3180: winner = 1, steps = 6\n",
      "02:39:30 [INFO] train episode 3181: winner = 1, steps = 6\n",
      "02:39:33 [INFO] train episode 3182: winner = 1, steps = 4\n",
      "02:39:38 [INFO] train episode 3183: winner = 1, steps = 6\n",
      "02:39:40 [INFO] train episode 3184: winner = 1, steps = 4\n",
      "02:39:49 [INFO] train episode 3185: winner = 0, steps = 8\n",
      "02:39:50 [INFO] train episode 3186: winner = 1, steps = 4\n",
      "02:39:58 [INFO] train episode 3187: winner = 1, steps = 4\n",
      "02:40:06 [INFO] train episode 3188: winner = 1, steps = 6\n",
      "02:40:06 [INFO] train episode 3189: winner = 1, steps = 4\n",
      "02:40:11 [INFO] train episode 3190: winner = 0, steps = 8\n",
      "02:40:13 [INFO] train episode 3191: winner = 1, steps = 4\n",
      "02:40:23 [INFO] train episode 3192: winner = 0, steps = 8\n",
      "02:40:27 [INFO] train episode 3193: winner = 1, steps = 6\n",
      "02:40:30 [INFO] train episode 3194: winner = 1, steps = 6\n",
      "02:40:33 [INFO] train episode 3195: winner = -1, steps = 5\n",
      "02:40:38 [INFO] train episode 3196: winner = 0, steps = 8\n",
      "02:40:40 [INFO] train episode 3197: winner = -1, steps = 7\n",
      "02:40:41 [INFO] train episode 3198: winner = 1, steps = 4\n",
      "02:40:43 [INFO] train episode 3199: winner = 1, steps = 6\n",
      "02:40:46 [INFO] train episode 3200: winner = 1, steps = 6\n",
      "02:40:49 [INFO] train episode 3201: winner = 1, steps = 4\n",
      "02:40:58 [INFO] train episode 3202: winner = 0, steps = 8\n",
      "02:41:01 [INFO] train episode 3203: winner = 1, steps = 6\n",
      "02:41:07 [INFO] train episode 3204: winner = 0, steps = 8\n",
      "02:41:07 [INFO] train episode 3205: winner = 1, steps = 6\n",
      "02:41:09 [INFO] train episode 3206: winner = 1, steps = 6\n",
      "02:41:15 [INFO] train episode 3207: winner = 1, steps = 8\n",
      "02:41:19 [INFO] train episode 3208: winner = 1, steps = 6\n",
      "02:41:24 [INFO] train episode 3209: winner = 0, steps = 8\n",
      "02:41:28 [INFO] train episode 3210: winner = 0, steps = 8\n",
      "02:41:32 [INFO] train episode 3211: winner = 0, steps = 8\n",
      "02:41:33 [INFO] train episode 3212: winner = 1, steps = 6\n",
      "02:41:35 [INFO] train episode 3213: winner = 1, steps = 6\n",
      "02:41:35 [INFO] train episode 3214: winner = 0, steps = 8\n",
      "02:41:36 [INFO] train episode 3215: winner = 0, steps = 8\n",
      "02:41:47 [INFO] train episode 3216: winner = 0, steps = 8\n",
      "02:41:47 [INFO] train episode 3217: winner = 1, steps = 4\n",
      "02:41:49 [INFO] train episode 3218: winner = 1, steps = 6\n",
      "02:41:51 [INFO] train episode 3219: winner = 0, steps = 8\n",
      "02:41:53 [INFO] train episode 3220: winner = 1, steps = 6\n",
      "02:41:55 [INFO] train episode 3221: winner = 0, steps = 8\n",
      "02:41:56 [INFO] train episode 3222: winner = 1, steps = 6\n",
      "02:41:57 [INFO] train episode 3223: winner = 1, steps = 4\n",
      "02:41:59 [INFO] train episode 3224: winner = 1, steps = 6\n",
      "02:42:01 [INFO] train episode 3225: winner = 1, steps = 6\n",
      "02:42:02 [INFO] train episode 3226: winner = 1, steps = 8\n",
      "02:42:05 [INFO] train episode 3227: winner = 0, steps = 8\n",
      "02:42:07 [INFO] train episode 3228: winner = 1, steps = 6\n",
      "02:42:10 [INFO] train episode 3229: winner = -1, steps = 5\n",
      "02:42:13 [INFO] train episode 3230: winner = 1, steps = 6\n",
      "02:42:15 [INFO] train episode 3231: winner = 1, steps = 4\n",
      "02:42:20 [INFO] train episode 3232: winner = -1, steps = 7\n",
      "02:42:27 [INFO] train episode 3233: winner = 0, steps = 8\n",
      "02:42:29 [INFO] train episode 3234: winner = 1, steps = 4\n",
      "02:42:29 [INFO] train episode 3235: winner = 1, steps = 4\n",
      "02:42:30 [INFO] train episode 3236: winner = 1, steps = 6\n",
      "02:42:31 [INFO] train episode 3237: winner = 1, steps = 6\n",
      "02:42:31 [INFO] train episode 3238: winner = 1, steps = 6\n",
      "02:42:33 [INFO] train episode 3239: winner = 1, steps = 4\n",
      "02:42:33 [INFO] train episode 3240: winner = 1, steps = 4\n",
      "02:42:36 [INFO] train episode 3241: winner = 0, steps = 8\n",
      "02:42:38 [INFO] train episode 3242: winner = 0, steps = 8\n",
      "02:42:42 [INFO] train episode 3243: winner = 1, steps = 8\n",
      "02:42:44 [INFO] train episode 3244: winner = 0, steps = 8\n",
      "02:42:44 [INFO] train episode 3245: winner = 1, steps = 4\n",
      "02:42:46 [INFO] train episode 3246: winner = 1, steps = 6\n",
      "02:42:46 [INFO] train episode 3247: winner = 0, steps = 8\n",
      "02:42:51 [INFO] train episode 3248: winner = 0, steps = 8\n",
      "02:42:52 [INFO] train episode 3249: winner = 0, steps = 8\n",
      "02:42:55 [INFO] train episode 3250: winner = 1, steps = 6\n",
      "02:42:56 [INFO] train episode 3251: winner = 1, steps = 4\n",
      "02:43:01 [INFO] train episode 3252: winner = 0, steps = 8\n",
      "02:43:01 [INFO] train episode 3253: winner = 1, steps = 6\n",
      "02:43:02 [INFO] train episode 3254: winner = 1, steps = 6\n",
      "02:43:02 [INFO] train episode 3255: winner = 0, steps = 8\n",
      "02:43:02 [INFO] train episode 3256: winner = 1, steps = 4\n",
      "02:43:03 [INFO] train episode 3257: winner = 1, steps = 6\n",
      "02:43:05 [INFO] train episode 3258: winner = 1, steps = 6\n",
      "02:43:07 [INFO] train episode 3259: winner = 0, steps = 8\n",
      "02:43:08 [INFO] train episode 3260: winner = 0, steps = 8\n",
      "02:43:08 [INFO] train episode 3261: winner = 1, steps = 4\n",
      "02:43:09 [INFO] train episode 3262: winner = 1, steps = 6\n",
      "02:43:14 [INFO] train episode 3263: winner = 1, steps = 8\n",
      "02:43:15 [INFO] train episode 3264: winner = 1, steps = 4\n",
      "02:43:16 [INFO] train episode 3265: winner = 1, steps = 6\n",
      "02:43:17 [INFO] train episode 3266: winner = 0, steps = 8\n",
      "02:43:19 [INFO] train episode 3267: winner = 0, steps = 8\n",
      "02:43:22 [INFO] train episode 3268: winner = -1, steps = 5\n",
      "02:43:22 [INFO] train episode 3269: winner = 1, steps = 6\n",
      "02:43:23 [INFO] train episode 3270: winner = 1, steps = 4\n",
      "02:43:25 [INFO] train episode 3271: winner = 0, steps = 8\n",
      "02:43:26 [INFO] train episode 3272: winner = 1, steps = 4\n",
      "02:43:27 [INFO] train episode 3273: winner = 1, steps = 6\n",
      "02:43:27 [INFO] train episode 3274: winner = 0, steps = 8\n",
      "02:43:27 [INFO] train episode 3275: winner = 0, steps = 8\n",
      "02:43:28 [INFO] train episode 3276: winner = 0, steps = 8\n",
      "02:43:30 [INFO] train episode 3277: winner = 0, steps = 8\n",
      "02:43:30 [INFO] train episode 3278: winner = 1, steps = 4\n",
      "02:43:33 [INFO] train episode 3279: winner = 1, steps = 4\n",
      "02:43:33 [INFO] train episode 3280: winner = 1, steps = 4\n",
      "02:43:34 [INFO] train episode 3281: winner = 1, steps = 6\n",
      "02:43:36 [INFO] train episode 3282: winner = -1, steps = 7\n",
      "02:43:37 [INFO] train episode 3283: winner = 1, steps = 4\n",
      "02:43:37 [INFO] train episode 3284: winner = 0, steps = 8\n",
      "02:43:41 [INFO] train episode 3285: winner = 0, steps = 8\n",
      "02:43:43 [INFO] train episode 3286: winner = 1, steps = 6\n",
      "02:43:43 [INFO] train episode 3287: winner = 1, steps = 4\n",
      "02:43:44 [INFO] train episode 3288: winner = 1, steps = 6\n",
      "02:43:46 [INFO] train episode 3289: winner = -1, steps = 5\n",
      "02:43:46 [INFO] train episode 3290: winner = 1, steps = 6\n",
      "02:43:47 [INFO] train episode 3291: winner = 1, steps = 6\n",
      "02:43:49 [INFO] train episode 3292: winner = 0, steps = 8\n",
      "02:43:50 [INFO] train episode 3293: winner = 1, steps = 4\n",
      "02:43:52 [INFO] train episode 3294: winner = 0, steps = 8\n",
      "02:43:54 [INFO] train episode 3295: winner = -1, steps = 5\n",
      "02:43:55 [INFO] train episode 3296: winner = 0, steps = 8\n",
      "02:43:56 [INFO] train episode 3297: winner = 1, steps = 4\n",
      "02:43:57 [INFO] train episode 3298: winner = 0, steps = 8\n",
      "02:43:57 [INFO] train episode 3299: winner = 1, steps = 4\n",
      "02:43:58 [INFO] train episode 3300: winner = 1, steps = 4\n",
      "02:43:58 [INFO] train episode 3301: winner = 1, steps = 4\n",
      "02:43:59 [INFO] train episode 3302: winner = 1, steps = 6\n",
      "02:43:59 [INFO] train episode 3303: winner = 1, steps = 6\n",
      "02:44:00 [INFO] train episode 3304: winner = 1, steps = 6\n",
      "02:44:00 [INFO] train episode 3305: winner = 1, steps = 4\n",
      "02:44:02 [INFO] train episode 3306: winner = 0, steps = 8\n",
      "02:44:02 [INFO] train episode 3307: winner = 1, steps = 6\n",
      "02:44:02 [INFO] train episode 3308: winner = 1, steps = 4\n",
      "02:44:03 [INFO] train episode 3309: winner = 1, steps = 6\n",
      "02:44:05 [INFO] train episode 3310: winner = 1, steps = 6\n",
      "02:44:06 [INFO] train episode 3311: winner = 1, steps = 4\n",
      "02:44:06 [INFO] train episode 3312: winner = 1, steps = 4\n",
      "02:44:07 [INFO] train episode 3313: winner = 0, steps = 8\n",
      "02:44:08 [INFO] train episode 3314: winner = 0, steps = 8\n",
      "02:44:08 [INFO] train episode 3315: winner = 1, steps = 6\n",
      "02:44:09 [INFO] train episode 3316: winner = 1, steps = 6\n",
      "02:44:09 [INFO] train episode 3317: winner = 1, steps = 6\n",
      "02:44:11 [INFO] train episode 3318: winner = 1, steps = 4\n",
      "02:44:11 [INFO] test episode 3318:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "02:44:20 [INFO] step 0：player 1, action (2, 2)\n",
      "+++\n",
      "+++\n",
      "++o\n",
      "02:44:27 [INFO] step 1：player -1, action (2, 0)\n",
      "+++\n",
      "+++\n",
      "x+o\n",
      "02:44:31 [INFO] step 2：player 1, action (1, 2)\n",
      "+++\n",
      "++o\n",
      "x+o\n",
      "02:44:31 [INFO] step 3：player -1, action (0, 1)\n",
      "+x+\n",
      "++o\n",
      "x+o\n",
      "02:44:32 [INFO] step 4：player 1, action (1, 1)\n",
      "+x+\n",
      "+oo\n",
      "x+o\n",
      "02:44:32 [INFO] step 5：player -1, action (2, 1)\n",
      "+x+\n",
      "+oo\n",
      "xxo\n",
      "02:44:33 [INFO] step 6：player 1, action (0, 2)\n",
      "+xo\n",
      "+oo\n",
      "xxo\n",
      "02:44:33 [INFO] test episode 3318: winner = 1, steps = 6\n",
      "02:44:49 [INFO] train episode 3319: winner = 0, steps = 8\n",
      "02:45:03 [INFO] train episode 3320: winner = 0, steps = 8\n",
      "02:45:15 [INFO] train episode 3321: winner = 0, steps = 8\n",
      "02:45:30 [INFO] train episode 3322: winner = 0, steps = 8\n",
      "02:45:32 [INFO] train episode 3323: winner = 1, steps = 4\n",
      "02:45:42 [INFO] train episode 3324: winner = 0, steps = 8\n",
      "02:45:52 [INFO] train episode 3325: winner = 1, steps = 6\n",
      "02:46:00 [INFO] train episode 3326: winner = 1, steps = 6\n",
      "02:46:03 [INFO] train episode 3327: winner = -1, steps = 5\n",
      "02:46:03 [INFO] train episode 3328: winner = 0, steps = 8\n",
      "02:46:05 [INFO] train episode 3329: winner = 1, steps = 4\n",
      "02:46:09 [INFO] train episode 3330: winner = 1, steps = 4\n",
      "02:46:16 [INFO] train episode 3331: winner = 1, steps = 6\n",
      "02:46:26 [INFO] train episode 3332: winner = 1, steps = 6\n",
      "02:46:28 [INFO] train episode 3333: winner = -1, steps = 7\n",
      "02:46:31 [INFO] train episode 3334: winner = 1, steps = 6\n",
      "02:46:41 [INFO] train episode 3335: winner = 1, steps = 6\n",
      "02:46:43 [INFO] train episode 3336: winner = 1, steps = 6\n",
      "02:46:47 [INFO] train episode 3337: winner = 1, steps = 6\n",
      "02:46:49 [INFO] train episode 3338: winner = 1, steps = 6\n",
      "02:46:53 [INFO] train episode 3339: winner = 0, steps = 8\n",
      "02:46:54 [INFO] train episode 3340: winner = 1, steps = 6\n",
      "02:47:01 [INFO] train episode 3341: winner = 1, steps = 8\n",
      "02:47:07 [INFO] train episode 3342: winner = 0, steps = 8\n",
      "02:47:09 [INFO] train episode 3343: winner = 1, steps = 6\n",
      "02:47:14 [INFO] train episode 3344: winner = 0, steps = 8\n",
      "02:47:19 [INFO] train episode 3345: winner = 1, steps = 8\n",
      "02:47:19 [INFO] train episode 3346: winner = 1, steps = 6\n",
      "02:47:26 [INFO] train episode 3347: winner = 0, steps = 8\n",
      "02:47:29 [INFO] train episode 3348: winner = -1, steps = 5\n",
      "02:47:29 [INFO] train episode 3349: winner = 1, steps = 8\n",
      "02:47:30 [INFO] train episode 3350: winner = 0, steps = 8\n",
      "02:47:32 [INFO] train episode 3351: winner = 0, steps = 8\n",
      "02:47:34 [INFO] train episode 3352: winner = 0, steps = 8\n",
      "02:47:43 [INFO] train episode 3353: winner = 0, steps = 8\n",
      "02:47:46 [INFO] train episode 3354: winner = 1, steps = 4\n",
      "02:47:49 [INFO] train episode 3355: winner = 1, steps = 8\n",
      "02:47:53 [INFO] train episode 3356: winner = 0, steps = 8\n",
      "02:47:55 [INFO] train episode 3357: winner = -1, steps = 5\n",
      "02:47:58 [INFO] train episode 3358: winner = 1, steps = 6\n",
      "02:47:58 [INFO] train episode 3359: winner = 1, steps = 6\n",
      "02:47:58 [INFO] train episode 3360: winner = 1, steps = 6\n",
      "02:47:59 [INFO] train episode 3361: winner = 1, steps = 6\n",
      "02:48:00 [INFO] train episode 3362: winner = 0, steps = 8\n",
      "02:48:04 [INFO] train episode 3363: winner = -1, steps = 5\n",
      "02:48:06 [INFO] train episode 3364: winner = 1, steps = 4\n",
      "02:48:08 [INFO] train episode 3365: winner = 1, steps = 6\n",
      "02:48:09 [INFO] train episode 3366: winner = 1, steps = 6\n",
      "02:48:11 [INFO] train episode 3367: winner = 0, steps = 8\n",
      "02:48:15 [INFO] train episode 3368: winner = -1, steps = 7\n",
      "02:48:18 [INFO] train episode 3369: winner = 0, steps = 8\n",
      "02:48:19 [INFO] train episode 3370: winner = 1, steps = 4\n",
      "02:48:20 [INFO] train episode 3371: winner = 1, steps = 4\n",
      "02:48:23 [INFO] train episode 3372: winner = 0, steps = 8\n",
      "02:48:25 [INFO] train episode 3373: winner = 1, steps = 8\n",
      "02:48:25 [INFO] train episode 3374: winner = -1, steps = 5\n",
      "02:48:27 [INFO] train episode 3375: winner = 1, steps = 6\n",
      "02:48:29 [INFO] train episode 3376: winner = 1, steps = 6\n",
      "02:48:29 [INFO] train episode 3377: winner = 0, steps = 8\n",
      "02:48:38 [INFO] train episode 3378: winner = 0, steps = 8\n",
      "02:48:39 [INFO] train episode 3379: winner = 0, steps = 8\n",
      "02:48:41 [INFO] train episode 3380: winner = 0, steps = 8\n",
      "02:48:42 [INFO] train episode 3381: winner = 0, steps = 8\n",
      "02:48:43 [INFO] train episode 3382: winner = 0, steps = 8\n",
      "02:48:44 [INFO] train episode 3383: winner = 1, steps = 8\n",
      "02:48:45 [INFO] train episode 3384: winner = 0, steps = 8\n",
      "02:48:49 [INFO] train episode 3385: winner = 0, steps = 8\n",
      "02:48:49 [INFO] train episode 3386: winner = 1, steps = 4\n",
      "02:48:49 [INFO] train episode 3387: winner = 0, steps = 8\n",
      "02:48:54 [INFO] train episode 3388: winner = 1, steps = 6\n",
      "02:48:54 [INFO] train episode 3389: winner = -1, steps = 7\n",
      "02:48:56 [INFO] train episode 3390: winner = 1, steps = 6\n",
      "02:48:57 [INFO] train episode 3391: winner = 1, steps = 8\n",
      "02:48:57 [INFO] train episode 3392: winner = 1, steps = 6\n",
      "02:48:58 [INFO] train episode 3393: winner = 0, steps = 8\n",
      "02:49:00 [INFO] train episode 3394: winner = 0, steps = 8\n",
      "02:49:01 [INFO] train episode 3395: winner = -1, steps = 7\n",
      "02:49:03 [INFO] train episode 3396: winner = 1, steps = 4\n",
      "02:49:03 [INFO] train episode 3397: winner = 1, steps = 6\n",
      "02:49:04 [INFO] train episode 3398: winner = 0, steps = 8\n",
      "02:49:09 [INFO] train episode 3399: winner = 0, steps = 8\n",
      "02:49:11 [INFO] train episode 3400: winner = 1, steps = 4\n",
      "02:49:11 [INFO] train episode 3401: winner = 0, steps = 8\n",
      "02:49:12 [INFO] train episode 3402: winner = 0, steps = 8\n",
      "02:49:12 [INFO] train episode 3403: winner = 1, steps = 6\n",
      "02:49:13 [INFO] train episode 3404: winner = 1, steps = 6\n",
      "02:49:13 [INFO] train episode 3405: winner = 1, steps = 6\n",
      "02:49:14 [INFO] train episode 3406: winner = 1, steps = 6\n",
      "02:49:16 [INFO] train episode 3407: winner = 1, steps = 6\n",
      "02:49:18 [INFO] train episode 3408: winner = 0, steps = 8\n",
      "02:49:19 [INFO] train episode 3409: winner = 0, steps = 8\n",
      "02:49:19 [INFO] train episode 3410: winner = 1, steps = 6\n",
      "02:49:21 [INFO] train episode 3411: winner = 0, steps = 8\n",
      "02:49:21 [INFO] train episode 3412: winner = 1, steps = 6\n",
      "02:49:22 [INFO] train episode 3413: winner = 1, steps = 6\n",
      "02:49:24 [INFO] train episode 3414: winner = 1, steps = 4\n",
      "02:49:25 [INFO] train episode 3415: winner = -1, steps = 7\n",
      "02:49:25 [INFO] train episode 3416: winner = 0, steps = 8\n",
      "02:49:25 [INFO] train episode 3417: winner = 1, steps = 4\n",
      "02:49:26 [INFO] train episode 3418: winner = 1, steps = 6\n",
      "02:49:27 [INFO] train episode 3419: winner = 0, steps = 8\n",
      "02:49:28 [INFO] train episode 3420: winner = 1, steps = 8\n",
      "02:49:30 [INFO] train episode 3421: winner = -1, steps = 7\n",
      "02:49:33 [INFO] train episode 3422: winner = 1, steps = 6\n",
      "02:49:33 [INFO] train episode 3423: winner = 1, steps = 6\n",
      "02:49:34 [INFO] train episode 3424: winner = 1, steps = 4\n",
      "02:49:34 [INFO] train episode 3425: winner = 1, steps = 4\n",
      "02:49:36 [INFO] train episode 3426: winner = 0, steps = 8\n",
      "02:49:37 [INFO] train episode 3427: winner = 1, steps = 6\n",
      "02:49:37 [INFO] train episode 3428: winner = 1, steps = 4\n",
      "02:49:39 [INFO] train episode 3429: winner = 1, steps = 6\n",
      "02:49:40 [INFO] train episode 3430: winner = 0, steps = 8\n",
      "02:49:43 [INFO] train episode 3431: winner = 1, steps = 8\n",
      "02:49:43 [INFO] train episode 3432: winner = 1, steps = 6\n",
      "02:49:43 [INFO] train episode 3433: winner = 1, steps = 4\n",
      "02:49:44 [INFO] train episode 3434: winner = 0, steps = 8\n",
      "02:49:44 [INFO] train episode 3435: winner = 0, steps = 8\n",
      "02:49:44 [INFO] train episode 3436: winner = -1, steps = 7\n",
      "02:49:45 [INFO] train episode 3437: winner = 0, steps = 8\n",
      "02:49:45 [INFO] train episode 3438: winner = 0, steps = 8\n",
      "02:49:47 [INFO] train episode 3439: winner = -1, steps = 5\n",
      "02:49:50 [INFO] train episode 3440: winner = 0, steps = 8\n",
      "02:49:51 [INFO] train episode 3441: winner = 1, steps = 6\n",
      "02:49:51 [INFO] train episode 3442: winner = 1, steps = 4\n",
      "02:49:51 [INFO] train episode 3443: winner = 1, steps = 6\n",
      "02:49:54 [INFO] train episode 3444: winner = 1, steps = 6\n",
      "02:49:54 [INFO] train episode 3445: winner = -1, steps = 5\n",
      "02:49:54 [INFO] train episode 3446: winner = 1, steps = 6\n",
      "02:49:55 [INFO] train episode 3447: winner = 1, steps = 4\n",
      "02:49:56 [INFO] train episode 3448: winner = 0, steps = 8\n",
      "02:49:57 [INFO] train episode 3449: winner = 1, steps = 6\n",
      "02:49:58 [INFO] train episode 3450: winner = 0, steps = 8\n",
      "02:49:58 [INFO] test episode 3450:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "02:50:07 [INFO] step 0：player 1, action (1, 1)\n",
      "+++\n",
      "+o+\n",
      "+++\n",
      "02:50:15 [INFO] step 1：player -1, action (0, 2)\n",
      "++x\n",
      "+o+\n",
      "+++\n",
      "02:50:22 [INFO] step 2：player 1, action (2, 1)\n",
      "++x\n",
      "+o+\n",
      "+o+\n",
      "02:50:24 [INFO] step 3：player -1, action (0, 1)\n",
      "+xx\n",
      "+o+\n",
      "+o+\n",
      "02:50:24 [INFO] step 4：player 1, action (0, 0)\n",
      "oxx\n",
      "+o+\n",
      "+o+\n",
      "02:50:24 [INFO] step 5：player -1, action (2, 2)\n",
      "oxx\n",
      "+o+\n",
      "+ox\n",
      "02:50:24 [INFO] step 6：player 1, action (1, 2)\n",
      "oxx\n",
      "+oo\n",
      "+ox\n",
      "02:50:24 [INFO] step 7：player -1, action (1, 0)\n",
      "oxx\n",
      "xoo\n",
      "+ox\n",
      "02:50:24 [INFO] step 8：player 1, action (2, 0)\n",
      "oxx\n",
      "xoo\n",
      "oox\n",
      "02:50:24 [INFO] test episode 3450: winner = 0, steps = 8\n",
      "02:50:34 [INFO] train episode 3451: winner = 1, steps = 6\n",
      "02:50:47 [INFO] train episode 3452: winner = 1, steps = 6\n",
      "02:51:01 [INFO] train episode 3453: winner = 1, steps = 6\n",
      "02:51:22 [INFO] train episode 3454: winner = 0, steps = 8\n",
      "02:51:25 [INFO] train episode 3455: winner = 0, steps = 8\n",
      "02:51:31 [INFO] train episode 3456: winner = 0, steps = 8\n",
      "02:51:40 [INFO] train episode 3457: winner = 0, steps = 8\n",
      "02:51:42 [INFO] train episode 3458: winner = 1, steps = 4\n",
      "02:51:51 [INFO] train episode 3459: winner = 1, steps = 8\n",
      "02:52:00 [INFO] train episode 3460: winner = 0, steps = 8\n",
      "02:52:06 [INFO] train episode 3461: winner = 1, steps = 8\n",
      "02:52:12 [INFO] train episode 3462: winner = -1, steps = 7\n",
      "02:52:17 [INFO] train episode 3463: winner = 0, steps = 8\n",
      "02:52:20 [INFO] train episode 3464: winner = 1, steps = 6\n",
      "02:52:24 [INFO] train episode 3465: winner = 0, steps = 8\n",
      "02:52:25 [INFO] train episode 3466: winner = 1, steps = 6\n",
      "02:52:38 [INFO] train episode 3467: winner = 0, steps = 8\n",
      "02:52:43 [INFO] train episode 3468: winner = -1, steps = 5\n",
      "02:52:47 [INFO] train episode 3469: winner = 1, steps = 6\n",
      "02:52:48 [INFO] train episode 3470: winner = 1, steps = 4\n",
      "02:52:55 [INFO] train episode 3471: winner = 0, steps = 8\n",
      "02:53:04 [INFO] train episode 3472: winner = 0, steps = 8\n",
      "02:53:08 [INFO] train episode 3473: winner = 1, steps = 6\n",
      "02:53:10 [INFO] train episode 3474: winner = 1, steps = 6\n",
      "02:53:16 [INFO] train episode 3475: winner = 0, steps = 8\n",
      "02:53:19 [INFO] train episode 3476: winner = 1, steps = 6\n",
      "02:53:19 [INFO] train episode 3477: winner = 1, steps = 4\n",
      "02:53:27 [INFO] train episode 3478: winner = 0, steps = 8\n",
      "02:53:27 [INFO] train episode 3479: winner = 0, steps = 8\n",
      "02:53:30 [INFO] train episode 3480: winner = 0, steps = 8\n",
      "02:53:36 [INFO] train episode 3481: winner = 0, steps = 8\n",
      "02:53:37 [INFO] train episode 3482: winner = 1, steps = 4\n",
      "02:53:37 [INFO] train episode 3483: winner = 1, steps = 6\n",
      "02:53:39 [INFO] train episode 3484: winner = 1, steps = 6\n",
      "02:53:41 [INFO] train episode 3485: winner = 1, steps = 6\n",
      "02:53:41 [INFO] train episode 3486: winner = 1, steps = 4\n",
      "02:53:44 [INFO] train episode 3487: winner = 1, steps = 6\n",
      "02:53:48 [INFO] train episode 3488: winner = 1, steps = 8\n",
      "02:53:51 [INFO] train episode 3489: winner = 0, steps = 8\n",
      "02:53:51 [INFO] train episode 3490: winner = 1, steps = 4\n",
      "02:53:51 [INFO] train episode 3491: winner = 0, steps = 8\n",
      "02:53:53 [INFO] train episode 3492: winner = 1, steps = 6\n",
      "02:53:54 [INFO] train episode 3493: winner = 0, steps = 8\n",
      "02:53:54 [INFO] train episode 3494: winner = 0, steps = 8\n",
      "02:53:55 [INFO] train episode 3495: winner = 0, steps = 8\n",
      "02:53:57 [INFO] train episode 3496: winner = 0, steps = 8\n",
      "02:53:57 [INFO] train episode 3497: winner = 0, steps = 8\n",
      "02:53:59 [INFO] train episode 3498: winner = 1, steps = 4\n",
      "02:54:04 [INFO] train episode 3499: winner = 0, steps = 8\n",
      "02:54:05 [INFO] train episode 3500: winner = 1, steps = 6\n",
      "02:54:09 [INFO] train episode 3501: winner = -1, steps = 7\n",
      "02:54:11 [INFO] train episode 3502: winner = 1, steps = 4\n",
      "02:54:15 [INFO] train episode 3503: winner = 0, steps = 8\n",
      "02:54:20 [INFO] train episode 3504: winner = 0, steps = 8\n",
      "02:54:23 [INFO] train episode 3505: winner = 1, steps = 8\n",
      "02:54:25 [INFO] train episode 3506: winner = 0, steps = 8\n",
      "02:54:29 [INFO] train episode 3507: winner = 1, steps = 6\n",
      "02:54:30 [INFO] train episode 3508: winner = -1, steps = 5\n",
      "02:54:33 [INFO] train episode 3509: winner = -1, steps = 7\n",
      "02:54:33 [INFO] train episode 3510: winner = 0, steps = 8\n",
      "02:54:34 [INFO] train episode 3511: winner = 0, steps = 8\n",
      "02:54:35 [INFO] train episode 3512: winner = 0, steps = 8\n",
      "02:54:36 [INFO] train episode 3513: winner = 1, steps = 6\n",
      "02:54:41 [INFO] train episode 3514: winner = 0, steps = 8\n",
      "02:54:41 [INFO] train episode 3515: winner = 1, steps = 4\n",
      "02:54:42 [INFO] train episode 3516: winner = 1, steps = 6\n",
      "02:54:44 [INFO] train episode 3517: winner = 1, steps = 4\n",
      "02:54:46 [INFO] train episode 3518: winner = 1, steps = 4\n",
      "02:54:47 [INFO] train episode 3519: winner = 1, steps = 4\n",
      "02:54:47 [INFO] train episode 3520: winner = 0, steps = 8\n",
      "02:54:50 [INFO] train episode 3521: winner = 1, steps = 6\n",
      "02:54:51 [INFO] train episode 3522: winner = 1, steps = 6\n",
      "02:54:52 [INFO] train episode 3523: winner = 0, steps = 8\n",
      "02:54:54 [INFO] train episode 3524: winner = 0, steps = 8\n",
      "02:54:55 [INFO] train episode 3525: winner = 0, steps = 8\n",
      "02:54:57 [INFO] train episode 3526: winner = 1, steps = 6\n",
      "02:54:58 [INFO] train episode 3527: winner = 1, steps = 6\n",
      "02:54:59 [INFO] train episode 3528: winner = 1, steps = 6\n",
      "02:55:02 [INFO] train episode 3529: winner = 1, steps = 8\n",
      "02:55:06 [INFO] train episode 3530: winner = 1, steps = 6\n",
      "02:55:09 [INFO] train episode 3531: winner = 0, steps = 8\n",
      "02:55:11 [INFO] train episode 3532: winner = 0, steps = 8\n",
      "02:55:13 [INFO] train episode 3533: winner = 0, steps = 8\n",
      "02:55:15 [INFO] train episode 3534: winner = 1, steps = 6\n",
      "02:55:15 [INFO] train episode 3535: winner = 1, steps = 4\n",
      "02:55:15 [INFO] train episode 3536: winner = 0, steps = 8\n",
      "02:55:17 [INFO] train episode 3537: winner = -1, steps = 7\n",
      "02:55:17 [INFO] train episode 3538: winner = 1, steps = 4\n",
      "02:55:19 [INFO] train episode 3539: winner = 0, steps = 8\n",
      "02:55:19 [INFO] train episode 3540: winner = 1, steps = 6\n",
      "02:55:22 [INFO] train episode 3541: winner = 1, steps = 8\n",
      "02:55:25 [INFO] train episode 3542: winner = 1, steps = 6\n",
      "02:55:26 [INFO] train episode 3543: winner = 1, steps = 6\n",
      "02:55:26 [INFO] train episode 3544: winner = 0, steps = 8\n",
      "02:55:27 [INFO] train episode 3545: winner = 0, steps = 8\n",
      "02:55:28 [INFO] train episode 3546: winner = 1, steps = 4\n",
      "02:55:29 [INFO] train episode 3547: winner = 1, steps = 8\n",
      "02:55:29 [INFO] train episode 3548: winner = 1, steps = 4\n",
      "02:55:29 [INFO] train episode 3549: winner = 0, steps = 8\n",
      "02:55:30 [INFO] train episode 3550: winner = 1, steps = 6\n",
      "02:55:31 [INFO] train episode 3551: winner = 1, steps = 6\n",
      "02:55:33 [INFO] train episode 3552: winner = 1, steps = 6\n",
      "02:55:33 [INFO] train episode 3553: winner = 1, steps = 6\n",
      "02:55:34 [INFO] train episode 3554: winner = 0, steps = 8\n",
      "02:55:34 [INFO] train episode 3555: winner = 1, steps = 4\n",
      "02:55:34 [INFO] train episode 3556: winner = 1, steps = 6\n",
      "02:55:35 [INFO] train episode 3557: winner = 0, steps = 8\n",
      "02:55:35 [INFO] train episode 3558: winner = 0, steps = 8\n",
      "02:55:37 [INFO] train episode 3559: winner = -1, steps = 5\n",
      "02:55:38 [INFO] train episode 3560: winner = 0, steps = 8\n",
      "02:55:39 [INFO] train episode 3561: winner = 1, steps = 6\n",
      "02:55:40 [INFO] train episode 3562: winner = 1, steps = 4\n",
      "02:55:41 [INFO] train episode 3563: winner = 1, steps = 4\n",
      "02:55:45 [INFO] train episode 3564: winner = 0, steps = 8\n",
      "02:55:49 [INFO] train episode 3565: winner = 0, steps = 8\n",
      "02:55:49 [INFO] train episode 3566: winner = 1, steps = 4\n",
      "02:55:50 [INFO] train episode 3567: winner = 1, steps = 4\n",
      "02:55:51 [INFO] train episode 3568: winner = 1, steps = 6\n",
      "02:55:51 [INFO] train episode 3569: winner = 1, steps = 6\n",
      "02:55:51 [INFO] train episode 3570: winner = 1, steps = 4\n",
      "02:55:54 [INFO] train episode 3571: winner = 1, steps = 6\n",
      "02:55:55 [INFO] train episode 3572: winner = 0, steps = 8\n",
      "02:55:56 [INFO] train episode 3573: winner = 0, steps = 8\n",
      "02:55:56 [INFO] train episode 3574: winner = 1, steps = 6\n",
      "02:55:56 [INFO] train episode 3575: winner = 1, steps = 6\n",
      "02:55:57 [INFO] train episode 3576: winner = 1, steps = 6\n",
      "02:55:57 [INFO] train episode 3577: winner = 1, steps = 4\n",
      "02:55:58 [INFO] train episode 3578: winner = 0, steps = 8\n",
      "02:56:03 [INFO] train episode 3579: winner = -1, steps = 7\n",
      "02:56:05 [INFO] train episode 3580: winner = 1, steps = 6\n",
      "02:56:10 [INFO] train episode 3581: winner = 0, steps = 8\n",
      "02:56:13 [INFO] train episode 3582: winner = 0, steps = 8\n",
      "02:56:13 [INFO] test episode 3582:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "02:56:22 [INFO] step 0：player 1, action (1, 1)\n",
      "+++\n",
      "+o+\n",
      "+++\n",
      "02:56:30 [INFO] step 1：player -1, action (2, 2)\n",
      "+++\n",
      "+o+\n",
      "++x\n",
      "02:56:36 [INFO] step 2：player 1, action (1, 2)\n",
      "+++\n",
      "+oo\n",
      "++x\n",
      "02:56:37 [INFO] step 3：player -1, action (1, 0)\n",
      "+++\n",
      "xoo\n",
      "++x\n",
      "02:56:38 [INFO] step 4：player 1, action (0, 2)\n",
      "++o\n",
      "xoo\n",
      "++x\n",
      "02:56:38 [INFO] step 5：player -1, action (2, 0)\n",
      "++o\n",
      "xoo\n",
      "x+x\n",
      "02:56:38 [INFO] step 6：player 1, action (0, 0)\n",
      "o+o\n",
      "xoo\n",
      "x+x\n",
      "02:56:38 [INFO] step 7：player -1, action (2, 1)\n",
      "o+o\n",
      "xoo\n",
      "xxx\n",
      "02:56:38 [INFO] test episode 3582: winner = -1, steps = 7\n",
      "02:56:50 [INFO] train episode 3583: winner = 1, steps = 6\n",
      "02:56:53 [INFO] train episode 3584: winner = 1, steps = 4\n",
      "02:57:03 [INFO] train episode 3585: winner = 1, steps = 4\n",
      "02:57:12 [INFO] train episode 3586: winner = 1, steps = 6\n",
      "02:57:14 [INFO] train episode 3587: winner = 1, steps = 6\n",
      "02:57:30 [INFO] train episode 3588: winner = 1, steps = 4\n",
      "02:57:35 [INFO] train episode 3589: winner = 0, steps = 8\n",
      "02:57:39 [INFO] train episode 3590: winner = 1, steps = 4\n",
      "02:57:43 [INFO] train episode 3591: winner = 1, steps = 6\n",
      "02:57:52 [INFO] train episode 3592: winner = -1, steps = 7\n",
      "02:57:55 [INFO] train episode 3593: winner = 1, steps = 6\n",
      "02:58:03 [INFO] train episode 3594: winner = 1, steps = 4\n",
      "02:58:13 [INFO] train episode 3595: winner = 0, steps = 8\n",
      "02:58:17 [INFO] train episode 3596: winner = 1, steps = 6\n",
      "02:58:21 [INFO] train episode 3597: winner = 1, steps = 4\n",
      "02:58:25 [INFO] train episode 3598: winner = 1, steps = 4\n",
      "02:58:25 [INFO] train episode 3599: winner = 1, steps = 6\n",
      "02:58:26 [INFO] train episode 3600: winner = 1, steps = 4\n",
      "02:58:28 [INFO] train episode 3601: winner = 1, steps = 6\n",
      "02:58:30 [INFO] train episode 3602: winner = -1, steps = 7\n",
      "02:58:32 [INFO] train episode 3603: winner = 1, steps = 4\n",
      "02:58:37 [INFO] train episode 3604: winner = 0, steps = 8\n",
      "02:58:39 [INFO] train episode 3605: winner = 1, steps = 6\n",
      "02:58:43 [INFO] train episode 3606: winner = 1, steps = 6\n",
      "02:58:49 [INFO] train episode 3607: winner = 0, steps = 8\n",
      "02:58:57 [INFO] train episode 3608: winner = 1, steps = 4\n",
      "02:59:09 [INFO] train episode 3609: winner = 1, steps = 6\n",
      "02:59:10 [INFO] train episode 3610: winner = 1, steps = 6\n",
      "02:59:10 [INFO] train episode 3611: winner = 1, steps = 6\n",
      "02:59:13 [INFO] train episode 3612: winner = 1, steps = 4\n",
      "02:59:14 [INFO] train episode 3613: winner = 1, steps = 6\n",
      "02:59:17 [INFO] train episode 3614: winner = 0, steps = 8\n",
      "02:59:19 [INFO] train episode 3615: winner = 1, steps = 6\n",
      "02:59:19 [INFO] train episode 3616: winner = 1, steps = 4\n",
      "02:59:20 [INFO] train episode 3617: winner = 1, steps = 4\n",
      "02:59:21 [INFO] train episode 3618: winner = 1, steps = 6\n",
      "02:59:24 [INFO] train episode 3619: winner = 1, steps = 6\n",
      "02:59:26 [INFO] train episode 3620: winner = 1, steps = 6\n",
      "02:59:29 [INFO] train episode 3621: winner = 1, steps = 6\n",
      "02:59:30 [INFO] train episode 3622: winner = 1, steps = 4\n",
      "02:59:34 [INFO] train episode 3623: winner = -1, steps = 5\n",
      "02:59:34 [INFO] train episode 3624: winner = -1, steps = 7\n",
      "02:59:39 [INFO] train episode 3625: winner = 0, steps = 8\n",
      "02:59:42 [INFO] train episode 3626: winner = 0, steps = 8\n",
      "02:59:42 [INFO] train episode 3627: winner = 1, steps = 6\n",
      "02:59:44 [INFO] train episode 3628: winner = -1, steps = 7\n",
      "02:59:48 [INFO] train episode 3629: winner = 0, steps = 8\n",
      "02:59:50 [INFO] train episode 3630: winner = 1, steps = 6\n",
      "02:59:50 [INFO] train episode 3631: winner = 1, steps = 6\n",
      "02:59:51 [INFO] train episode 3632: winner = 1, steps = 4\n",
      "02:59:57 [INFO] train episode 3633: winner = 1, steps = 6\n",
      "03:00:00 [INFO] train episode 3634: winner = -1, steps = 7\n",
      "03:00:03 [INFO] train episode 3635: winner = 0, steps = 8\n",
      "03:00:06 [INFO] train episode 3636: winner = 1, steps = 4\n",
      "03:00:06 [INFO] train episode 3637: winner = 0, steps = 8\n",
      "03:00:07 [INFO] train episode 3638: winner = 0, steps = 8\n",
      "03:00:07 [INFO] train episode 3639: winner = 0, steps = 8\n",
      "03:00:08 [INFO] train episode 3640: winner = 0, steps = 8\n",
      "03:00:09 [INFO] train episode 3641: winner = 0, steps = 8\n",
      "03:00:10 [INFO] train episode 3642: winner = 0, steps = 8\n",
      "03:00:15 [INFO] train episode 3643: winner = -1, steps = 7\n",
      "03:00:17 [INFO] train episode 3644: winner = 0, steps = 8\n",
      "03:00:18 [INFO] train episode 3645: winner = 1, steps = 6\n",
      "03:00:19 [INFO] train episode 3646: winner = 1, steps = 6\n",
      "03:00:20 [INFO] train episode 3647: winner = 0, steps = 8\n",
      "03:00:24 [INFO] train episode 3648: winner = 0, steps = 8\n",
      "03:00:27 [INFO] train episode 3649: winner = 1, steps = 4\n",
      "03:00:30 [INFO] train episode 3650: winner = 1, steps = 6\n",
      "03:00:33 [INFO] train episode 3651: winner = 0, steps = 8\n",
      "03:00:34 [INFO] train episode 3652: winner = 1, steps = 6\n",
      "03:00:37 [INFO] train episode 3653: winner = 0, steps = 8\n",
      "03:00:38 [INFO] train episode 3654: winner = 1, steps = 4\n",
      "03:00:43 [INFO] train episode 3655: winner = 1, steps = 8\n",
      "03:00:43 [INFO] train episode 3656: winner = 1, steps = 4\n",
      "03:00:46 [INFO] train episode 3657: winner = 1, steps = 6\n",
      "03:00:46 [INFO] train episode 3658: winner = 1, steps = 4\n",
      "03:00:46 [INFO] train episode 3659: winner = 1, steps = 4\n",
      "03:00:48 [INFO] train episode 3660: winner = 1, steps = 6\n",
      "03:00:50 [INFO] train episode 3661: winner = 0, steps = 8\n",
      "03:00:50 [INFO] train episode 3662: winner = 1, steps = 6\n",
      "03:00:50 [INFO] train episode 3663: winner = 1, steps = 6\n",
      "03:00:50 [INFO] train episode 3664: winner = 1, steps = 6\n",
      "03:00:52 [INFO] train episode 3665: winner = 1, steps = 8\n",
      "03:00:54 [INFO] train episode 3666: winner = -1, steps = 7\n",
      "03:00:55 [INFO] train episode 3667: winner = 0, steps = 8\n",
      "03:00:57 [INFO] train episode 3668: winner = 1, steps = 6\n",
      "03:00:58 [INFO] train episode 3669: winner = 1, steps = 4\n",
      "03:00:59 [INFO] train episode 3670: winner = 1, steps = 4\n",
      "03:00:59 [INFO] train episode 3671: winner = 1, steps = 6\n",
      "03:00:59 [INFO] train episode 3672: winner = 1, steps = 6\n",
      "03:01:00 [INFO] train episode 3673: winner = 0, steps = 8\n",
      "03:01:00 [INFO] train episode 3674: winner = -1, steps = 5\n",
      "03:01:00 [INFO] train episode 3675: winner = 1, steps = 4\n",
      "03:01:01 [INFO] train episode 3676: winner = 1, steps = 4\n",
      "03:01:01 [INFO] train episode 3677: winner = 0, steps = 8\n",
      "03:01:02 [INFO] train episode 3678: winner = 0, steps = 8\n",
      "03:01:05 [INFO] train episode 3679: winner = 0, steps = 8\n",
      "03:01:07 [INFO] train episode 3680: winner = 0, steps = 8\n",
      "03:01:08 [INFO] train episode 3681: winner = 1, steps = 4\n",
      "03:01:08 [INFO] train episode 3682: winner = 1, steps = 6\n",
      "03:01:10 [INFO] train episode 3683: winner = -1, steps = 5\n",
      "03:01:12 [INFO] train episode 3684: winner = 1, steps = 4\n",
      "03:01:13 [INFO] train episode 3685: winner = 1, steps = 4\n",
      "03:01:17 [INFO] train episode 3686: winner = 0, steps = 8\n",
      "03:01:20 [INFO] train episode 3687: winner = 1, steps = 8\n",
      "03:01:21 [INFO] train episode 3688: winner = 1, steps = 6\n",
      "03:01:21 [INFO] train episode 3689: winner = 0, steps = 8\n",
      "03:01:22 [INFO] train episode 3690: winner = 0, steps = 8\n",
      "03:01:24 [INFO] train episode 3691: winner = 1, steps = 4\n",
      "03:01:31 [INFO] train episode 3692: winner = 1, steps = 8\n",
      "03:01:31 [INFO] train episode 3693: winner = -1, steps = 5\n",
      "03:01:31 [INFO] train episode 3694: winner = 1, steps = 6\n",
      "03:01:32 [INFO] train episode 3695: winner = 1, steps = 6\n",
      "03:01:32 [INFO] train episode 3696: winner = 1, steps = 4\n",
      "03:01:37 [INFO] train episode 3697: winner = 0, steps = 8\n",
      "03:01:37 [INFO] train episode 3698: winner = 1, steps = 6\n",
      "03:01:37 [INFO] train episode 3699: winner = 1, steps = 6\n",
      "03:01:37 [INFO] train episode 3700: winner = 1, steps = 6\n",
      "03:01:38 [INFO] train episode 3701: winner = 1, steps = 6\n",
      "03:01:39 [INFO] train episode 3702: winner = 0, steps = 8\n",
      "03:01:39 [INFO] train episode 3703: winner = 1, steps = 6\n",
      "03:01:39 [INFO] train episode 3704: winner = -1, steps = 5\n",
      "03:01:41 [INFO] train episode 3705: winner = 0, steps = 8\n",
      "03:01:44 [INFO] train episode 3706: winner = -1, steps = 7\n",
      "03:01:44 [INFO] train episode 3707: winner = 1, steps = 6\n",
      "03:01:46 [INFO] train episode 3708: winner = 1, steps = 4\n",
      "03:01:46 [INFO] train episode 3709: winner = 1, steps = 4\n",
      "03:01:46 [INFO] train episode 3710: winner = 0, steps = 8\n",
      "03:01:48 [INFO] train episode 3711: winner = -1, steps = 5\n",
      "03:01:48 [INFO] train episode 3712: winner = 1, steps = 6\n",
      "03:01:48 [INFO] train episode 3713: winner = 1, steps = 6\n",
      "03:01:48 [INFO] train episode 3714: winner = 1, steps = 6\n",
      "03:01:48 [INFO] train episode 3715: winner = 0, steps = 8\n",
      "03:01:49 [INFO] train episode 3716: winner = 1, steps = 6\n",
      "03:01:54 [INFO] train episode 3717: winner = 0, steps = 8\n",
      "03:01:55 [INFO] train episode 3718: winner = 1, steps = 8\n",
      "03:01:56 [INFO] train episode 3719: winner = 1, steps = 6\n",
      "03:01:57 [INFO] train episode 3720: winner = 0, steps = 8\n",
      "03:01:57 [INFO] train episode 3721: winner = 0, steps = 8\n",
      "03:01:59 [INFO] train episode 3722: winner = 0, steps = 8\n",
      "03:01:59 [INFO] test episode 3722:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "03:02:08 [INFO] step 0：player 1, action (1, 1)\n",
      "+++\n",
      "+o+\n",
      "+++\n",
      "03:02:16 [INFO] step 1：player -1, action (1, 0)\n",
      "+++\n",
      "xo+\n",
      "+++\n",
      "03:02:19 [INFO] step 2：player 1, action (1, 2)\n",
      "+++\n",
      "xoo\n",
      "+++\n",
      "03:02:24 [INFO] step 3：player -1, action (0, 0)\n",
      "x++\n",
      "xoo\n",
      "+++\n",
      "03:02:25 [INFO] step 4：player 1, action (2, 0)\n",
      "x++\n",
      "xoo\n",
      "o++\n",
      "03:02:25 [INFO] step 5：player -1, action (0, 2)\n",
      "x+x\n",
      "xoo\n",
      "o++\n",
      "03:02:25 [INFO] step 6：player 1, action (0, 1)\n",
      "xox\n",
      "xoo\n",
      "o++\n",
      "03:02:25 [INFO] step 7：player -1, action (2, 1)\n",
      "xox\n",
      "xoo\n",
      "ox+\n",
      "03:02:25 [INFO] step 8：player 1, action (2, 2)\n",
      "xox\n",
      "xoo\n",
      "oxo\n",
      "03:02:25 [INFO] test episode 3722: winner = 0, steps = 8\n",
      "03:02:35 [INFO] train episode 3723: winner = 1, steps = 6\n",
      "03:02:38 [INFO] train episode 3724: winner = 1, steps = 4\n",
      "03:02:54 [INFO] train episode 3725: winner = 1, steps = 6\n",
      "03:03:09 [INFO] train episode 3726: winner = 1, steps = 6\n",
      "03:03:15 [INFO] train episode 3727: winner = 1, steps = 4\n",
      "03:03:27 [INFO] train episode 3728: winner = 1, steps = 4\n",
      "03:03:35 [INFO] train episode 3729: winner = -1, steps = 7\n",
      "03:03:51 [INFO] train episode 3730: winner = 1, steps = 6\n",
      "03:03:56 [INFO] train episode 3731: winner = 0, steps = 8\n",
      "03:03:58 [INFO] train episode 3732: winner = 1, steps = 4\n",
      "03:04:03 [INFO] train episode 3733: winner = 0, steps = 8\n",
      "03:04:06 [INFO] train episode 3734: winner = 0, steps = 8\n",
      "03:04:07 [INFO] train episode 3735: winner = 0, steps = 8\n",
      "03:04:15 [INFO] train episode 3736: winner = 0, steps = 8\n",
      "03:04:16 [INFO] train episode 3737: winner = -1, steps = 5\n",
      "03:04:19 [INFO] train episode 3738: winner = 1, steps = 6\n",
      "03:04:25 [INFO] train episode 3739: winner = 1, steps = 4\n",
      "03:04:30 [INFO] train episode 3740: winner = 0, steps = 8\n",
      "03:04:31 [INFO] train episode 3741: winner = 1, steps = 6\n",
      "03:04:35 [INFO] train episode 3742: winner = 0, steps = 8\n",
      "03:04:42 [INFO] train episode 3743: winner = 1, steps = 4\n",
      "03:04:45 [INFO] train episode 3744: winner = 1, steps = 6\n",
      "03:04:49 [INFO] train episode 3745: winner = 0, steps = 8\n",
      "03:04:54 [INFO] train episode 3746: winner = 0, steps = 8\n",
      "03:04:57 [INFO] train episode 3747: winner = -1, steps = 7\n",
      "03:04:58 [INFO] train episode 3748: winner = 1, steps = 6\n",
      "03:05:03 [INFO] train episode 3749: winner = -1, steps = 7\n",
      "03:05:07 [INFO] train episode 3750: winner = -1, steps = 5\n",
      "03:05:08 [INFO] train episode 3751: winner = 1, steps = 4\n",
      "03:05:13 [INFO] train episode 3752: winner = 1, steps = 8\n",
      "03:05:16 [INFO] train episode 3753: winner = -1, steps = 5\n",
      "03:05:17 [INFO] train episode 3754: winner = 0, steps = 8\n",
      "03:05:19 [INFO] train episode 3755: winner = -1, steps = 5\n",
      "03:05:19 [INFO] train episode 3756: winner = 0, steps = 8\n",
      "03:05:23 [INFO] train episode 3757: winner = 0, steps = 8\n",
      "03:05:25 [INFO] train episode 3758: winner = 1, steps = 4\n",
      "03:05:30 [INFO] train episode 3759: winner = 0, steps = 8\n",
      "03:05:32 [INFO] train episode 3760: winner = 1, steps = 4\n",
      "03:05:33 [INFO] train episode 3761: winner = 1, steps = 4\n",
      "03:05:36 [INFO] train episode 3762: winner = 0, steps = 8\n",
      "03:05:40 [INFO] train episode 3763: winner = 0, steps = 8\n",
      "03:05:42 [INFO] train episode 3764: winner = 1, steps = 4\n",
      "03:05:43 [INFO] train episode 3765: winner = -1, steps = 5\n",
      "03:05:43 [INFO] train episode 3766: winner = 0, steps = 8\n",
      "03:05:47 [INFO] train episode 3767: winner = 1, steps = 6\n",
      "03:05:49 [INFO] train episode 3768: winner = -1, steps = 7\n",
      "03:05:51 [INFO] train episode 3769: winner = 0, steps = 8\n",
      "03:05:52 [INFO] train episode 3770: winner = -1, steps = 7\n",
      "03:05:54 [INFO] train episode 3771: winner = 0, steps = 8\n",
      "03:05:54 [INFO] train episode 3772: winner = 1, steps = 4\n",
      "03:05:54 [INFO] train episode 3773: winner = -1, steps = 5\n",
      "03:05:57 [INFO] train episode 3774: winner = 0, steps = 8\n",
      "03:06:02 [INFO] train episode 3775: winner = 0, steps = 8\n",
      "03:06:02 [INFO] train episode 3776: winner = 1, steps = 6\n",
      "03:06:05 [INFO] train episode 3777: winner = 1, steps = 4\n",
      "03:06:07 [INFO] train episode 3778: winner = 1, steps = 6\n",
      "03:06:09 [INFO] train episode 3779: winner = 0, steps = 8\n",
      "03:06:10 [INFO] train episode 3780: winner = 1, steps = 6\n",
      "03:06:14 [INFO] train episode 3781: winner = -1, steps = 7\n",
      "03:06:14 [INFO] train episode 3782: winner = 1, steps = 4\n",
      "03:06:17 [INFO] train episode 3783: winner = 0, steps = 8\n",
      "03:06:18 [INFO] train episode 3784: winner = 1, steps = 6\n",
      "03:06:20 [INFO] train episode 3785: winner = 1, steps = 6\n",
      "03:06:22 [INFO] train episode 3786: winner = 1, steps = 4\n",
      "03:06:22 [INFO] train episode 3787: winner = 1, steps = 4\n",
      "03:06:25 [INFO] train episode 3788: winner = 0, steps = 8\n",
      "03:06:27 [INFO] train episode 3789: winner = 0, steps = 8\n",
      "03:06:29 [INFO] train episode 3790: winner = 0, steps = 8\n",
      "03:06:33 [INFO] train episode 3791: winner = 0, steps = 8\n",
      "03:06:36 [INFO] train episode 3792: winner = 1, steps = 6\n",
      "03:06:38 [INFO] train episode 3793: winner = 1, steps = 6\n",
      "03:06:38 [INFO] train episode 3794: winner = 1, steps = 4\n",
      "03:06:39 [INFO] train episode 3795: winner = 1, steps = 6\n",
      "03:06:40 [INFO] train episode 3796: winner = 0, steps = 8\n",
      "03:06:41 [INFO] train episode 3797: winner = 1, steps = 6\n",
      "03:06:42 [INFO] train episode 3798: winner = 0, steps = 8\n",
      "03:06:43 [INFO] train episode 3799: winner = -1, steps = 5\n",
      "03:06:49 [INFO] train episode 3800: winner = 0, steps = 8\n",
      "03:06:50 [INFO] train episode 3801: winner = 1, steps = 6\n",
      "03:06:50 [INFO] train episode 3802: winner = 0, steps = 8\n",
      "03:06:51 [INFO] train episode 3803: winner = 1, steps = 6\n",
      "03:06:51 [INFO] train episode 3804: winner = 1, steps = 6\n",
      "03:06:55 [INFO] train episode 3805: winner = 1, steps = 6\n",
      "03:06:57 [INFO] train episode 3806: winner = 1, steps = 6\n",
      "03:06:58 [INFO] train episode 3807: winner = -1, steps = 5\n",
      "03:06:59 [INFO] train episode 3808: winner = 1, steps = 6\n",
      "03:07:00 [INFO] train episode 3809: winner = 1, steps = 6\n",
      "03:07:01 [INFO] train episode 3810: winner = 0, steps = 8\n",
      "03:07:02 [INFO] train episode 3811: winner = 0, steps = 8\n",
      "03:07:03 [INFO] train episode 3812: winner = 1, steps = 6\n",
      "03:07:03 [INFO] train episode 3813: winner = 1, steps = 6\n",
      "03:07:03 [INFO] train episode 3814: winner = 0, steps = 8\n",
      "03:07:04 [INFO] train episode 3815: winner = 0, steps = 8\n",
      "03:07:05 [INFO] train episode 3816: winner = 1, steps = 4\n",
      "03:07:06 [INFO] train episode 3817: winner = 1, steps = 4\n",
      "03:07:06 [INFO] train episode 3818: winner = 1, steps = 6\n",
      "03:07:06 [INFO] train episode 3819: winner = 1, steps = 6\n",
      "03:07:06 [INFO] train episode 3820: winner = 0, steps = 8\n",
      "03:07:11 [INFO] train episode 3821: winner = 1, steps = 8\n",
      "03:07:12 [INFO] train episode 3822: winner = 1, steps = 6\n",
      "03:07:12 [INFO] train episode 3823: winner = 1, steps = 6\n",
      "03:07:13 [INFO] train episode 3824: winner = -1, steps = 5\n",
      "03:07:14 [INFO] train episode 3825: winner = 1, steps = 4\n",
      "03:07:15 [INFO] train episode 3826: winner = 1, steps = 4\n",
      "03:07:21 [INFO] train episode 3827: winner = 0, steps = 8\n",
      "03:07:22 [INFO] train episode 3828: winner = 1, steps = 4\n",
      "03:07:23 [INFO] train episode 3829: winner = 1, steps = 6\n",
      "03:07:27 [INFO] train episode 3830: winner = 0, steps = 8\n",
      "03:07:29 [INFO] train episode 3831: winner = 0, steps = 8\n",
      "03:07:32 [INFO] train episode 3832: winner = -1, steps = 7\n",
      "03:07:33 [INFO] train episode 3833: winner = 0, steps = 8\n",
      "03:07:33 [INFO] train episode 3834: winner = 1, steps = 6\n",
      "03:07:34 [INFO] train episode 3835: winner = 1, steps = 6\n",
      "03:07:35 [INFO] train episode 3836: winner = 1, steps = 6\n",
      "03:07:35 [INFO] train episode 3837: winner = 1, steps = 4\n",
      "03:07:35 [INFO] train episode 3838: winner = 0, steps = 8\n",
      "03:07:37 [INFO] train episode 3839: winner = -1, steps = 7\n",
      "03:07:43 [INFO] train episode 3840: winner = 0, steps = 8\n",
      "03:07:45 [INFO] train episode 3841: winner = 0, steps = 8\n",
      "03:07:45 [INFO] train episode 3842: winner = 1, steps = 6\n",
      "03:07:45 [INFO] train episode 3843: winner = 1, steps = 4\n",
      "03:07:45 [INFO] train episode 3844: winner = 1, steps = 4\n",
      "03:07:47 [INFO] train episode 3845: winner = 0, steps = 8\n",
      "03:07:50 [INFO] train episode 3846: winner = 0, steps = 8\n",
      "03:07:52 [INFO] train episode 3847: winner = 0, steps = 8\n",
      "03:07:52 [INFO] train episode 3848: winner = 1, steps = 6\n",
      "03:07:52 [INFO] train episode 3849: winner = 1, steps = 4\n",
      "03:07:52 [INFO] train episode 3850: winner = 1, steps = 4\n",
      "03:07:53 [INFO] train episode 3851: winner = 0, steps = 8\n",
      "03:07:55 [INFO] train episode 3852: winner = -1, steps = 5\n",
      "03:07:55 [INFO] train episode 3853: winner = 0, steps = 8\n",
      "03:08:01 [INFO] train episode 3854: winner = 0, steps = 8\n",
      "03:08:02 [INFO] train episode 3855: winner = 1, steps = 6\n",
      "03:08:02 [INFO] train episode 3856: winner = 1, steps = 6\n",
      "03:08:02 [INFO] train episode 3857: winner = 1, steps = 6\n",
      "03:08:03 [INFO] train episode 3858: winner = 0, steps = 8\n",
      "03:08:04 [INFO] train episode 3859: winner = 0, steps = 8\n",
      "03:08:04 [INFO] test episode 3859:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "03:08:13 [INFO] step 0：player 1, action (1, 2)\n",
      "+++\n",
      "++o\n",
      "+++\n",
      "03:08:21 [INFO] step 1：player -1, action (1, 1)\n",
      "+++\n",
      "+xo\n",
      "+++\n",
      "03:08:26 [INFO] step 2：player 1, action (2, 1)\n",
      "+++\n",
      "+xo\n",
      "+o+\n",
      "03:08:30 [INFO] step 3：player -1, action (2, 2)\n",
      "+++\n",
      "+xo\n",
      "+ox\n",
      "03:08:31 [INFO] step 4：player 1, action (0, 2)\n",
      "++o\n",
      "+xo\n",
      "+ox\n",
      "03:08:32 [INFO] step 5：player -1, action (0, 0)\n",
      "x+o\n",
      "+xo\n",
      "+ox\n",
      "03:08:32 [INFO] test episode 3859: winner = -1, steps = 5\n",
      "03:08:48 [INFO] train episode 3860: winner = 1, steps = 6\n",
      "03:08:58 [INFO] train episode 3861: winner = 1, steps = 4\n",
      "03:09:09 [INFO] train episode 3862: winner = -1, steps = 5\n",
      "03:09:20 [INFO] train episode 3863: winner = 1, steps = 4\n",
      "03:09:22 [INFO] train episode 3864: winner = 1, steps = 4\n",
      "03:09:33 [INFO] train episode 3865: winner = 1, steps = 8\n",
      "03:09:38 [INFO] train episode 3866: winner = 0, steps = 8\n",
      "03:09:40 [INFO] train episode 3867: winner = 1, steps = 6\n",
      "03:09:47 [INFO] train episode 3868: winner = 0, steps = 8\n",
      "03:09:56 [INFO] train episode 3869: winner = 0, steps = 8\n",
      "03:09:59 [INFO] train episode 3870: winner = 1, steps = 6\n",
      "03:10:09 [INFO] train episode 3871: winner = 1, steps = 8\n",
      "03:10:10 [INFO] train episode 3872: winner = 0, steps = 8\n",
      "03:10:18 [INFO] train episode 3873: winner = -1, steps = 5\n",
      "03:10:28 [INFO] train episode 3874: winner = 1, steps = 4\n",
      "03:10:34 [INFO] train episode 3875: winner = 0, steps = 8\n",
      "03:10:36 [INFO] train episode 3876: winner = 1, steps = 6\n",
      "03:10:43 [INFO] train episode 3877: winner = 0, steps = 8\n",
      "03:10:48 [INFO] train episode 3878: winner = 1, steps = 8\n",
      "03:10:50 [INFO] train episode 3879: winner = 1, steps = 6\n",
      "03:10:53 [INFO] train episode 3880: winner = 1, steps = 6\n",
      "03:10:55 [INFO] train episode 3881: winner = 1, steps = 6\n",
      "03:10:57 [INFO] train episode 3882: winner = 1, steps = 6\n",
      "03:11:00 [INFO] train episode 3883: winner = 1, steps = 6\n",
      "03:11:00 [INFO] train episode 3884: winner = 0, steps = 8\n",
      "03:11:02 [INFO] train episode 3885: winner = -1, steps = 5\n",
      "03:11:03 [INFO] train episode 3886: winner = 1, steps = 4\n",
      "03:11:04 [INFO] train episode 3887: winner = 1, steps = 4\n",
      "03:11:06 [INFO] train episode 3888: winner = 0, steps = 8\n",
      "03:11:07 [INFO] train episode 3889: winner = 1, steps = 8\n",
      "03:11:09 [INFO] train episode 3890: winner = 0, steps = 8\n",
      "03:11:10 [INFO] train episode 3891: winner = 1, steps = 4\n",
      "03:11:13 [INFO] train episode 3892: winner = 1, steps = 4\n",
      "03:11:13 [INFO] train episode 3893: winner = 1, steps = 6\n",
      "03:11:13 [INFO] train episode 3894: winner = 1, steps = 6\n",
      "03:11:15 [INFO] train episode 3895: winner = 0, steps = 8\n",
      "03:11:15 [INFO] train episode 3896: winner = 1, steps = 6\n",
      "03:11:16 [INFO] train episode 3897: winner = 1, steps = 4\n",
      "03:11:20 [INFO] train episode 3898: winner = 0, steps = 8\n",
      "03:11:23 [INFO] train episode 3899: winner = 1, steps = 4\n",
      "03:11:23 [INFO] train episode 3900: winner = 0, steps = 8\n",
      "03:11:23 [INFO] train episode 3901: winner = 1, steps = 4\n",
      "03:11:24 [INFO] train episode 3902: winner = 1, steps = 6\n",
      "03:11:26 [INFO] train episode 3903: winner = -1, steps = 5\n",
      "03:11:26 [INFO] train episode 3904: winner = 1, steps = 4\n",
      "03:11:27 [INFO] train episode 3905: winner = 0, steps = 8\n",
      "03:11:31 [INFO] train episode 3906: winner = 1, steps = 4\n",
      "03:11:34 [INFO] train episode 3907: winner = 0, steps = 8\n",
      "03:11:37 [INFO] train episode 3908: winner = 1, steps = 6\n",
      "03:11:38 [INFO] train episode 3909: winner = 1, steps = 6\n",
      "03:11:41 [INFO] train episode 3910: winner = -1, steps = 5\n",
      "03:11:45 [INFO] train episode 3911: winner = 0, steps = 8\n",
      "03:11:46 [INFO] train episode 3912: winner = 0, steps = 8\n",
      "03:11:49 [INFO] train episode 3913: winner = 1, steps = 4\n",
      "03:11:53 [INFO] train episode 3914: winner = 1, steps = 8\n",
      "03:11:55 [INFO] train episode 3915: winner = 0, steps = 8\n",
      "03:11:58 [INFO] train episode 3916: winner = 0, steps = 8\n",
      "03:11:59 [INFO] train episode 3917: winner = 0, steps = 8\n",
      "03:12:02 [INFO] train episode 3918: winner = 1, steps = 4\n",
      "03:12:03 [INFO] train episode 3919: winner = 1, steps = 6\n",
      "03:12:03 [INFO] train episode 3920: winner = 0, steps = 8\n",
      "03:12:04 [INFO] train episode 3921: winner = 0, steps = 8\n",
      "03:12:05 [INFO] train episode 3922: winner = 1, steps = 4\n",
      "03:12:05 [INFO] train episode 3923: winner = 0, steps = 8\n",
      "03:12:07 [INFO] train episode 3924: winner = -1, steps = 5\n",
      "03:12:09 [INFO] train episode 3925: winner = 1, steps = 4\n",
      "03:12:13 [INFO] train episode 3926: winner = 1, steps = 8\n",
      "03:12:15 [INFO] train episode 3927: winner = -1, steps = 7\n",
      "03:12:15 [INFO] train episode 3928: winner = -1, steps = 5\n",
      "03:12:22 [INFO] train episode 3929: winner = 1, steps = 8\n",
      "03:12:24 [INFO] train episode 3930: winner = 1, steps = 4\n",
      "03:12:24 [INFO] train episode 3931: winner = 1, steps = 6\n",
      "03:12:25 [INFO] train episode 3932: winner = 0, steps = 8\n",
      "03:12:26 [INFO] train episode 3933: winner = 0, steps = 8\n",
      "03:12:28 [INFO] train episode 3934: winner = 0, steps = 8\n",
      "03:12:29 [INFO] train episode 3935: winner = 1, steps = 4\n",
      "03:12:34 [INFO] train episode 3936: winner = 0, steps = 8\n",
      "03:12:36 [INFO] train episode 3937: winner = 0, steps = 8\n",
      "03:12:36 [INFO] train episode 3938: winner = 1, steps = 4\n",
      "03:12:37 [INFO] train episode 3939: winner = 0, steps = 8\n",
      "03:12:44 [INFO] train episode 3940: winner = 0, steps = 8\n",
      "03:12:44 [INFO] train episode 3941: winner = 0, steps = 8\n",
      "03:12:44 [INFO] train episode 3942: winner = 0, steps = 8\n",
      "03:12:46 [INFO] train episode 3943: winner = -1, steps = 7\n",
      "03:12:47 [INFO] train episode 3944: winner = 1, steps = 6\n",
      "03:12:48 [INFO] train episode 3945: winner = 1, steps = 6\n",
      "03:12:50 [INFO] train episode 3946: winner = -1, steps = 5\n",
      "03:12:50 [INFO] train episode 3947: winner = 1, steps = 6\n",
      "03:12:50 [INFO] train episode 3948: winner = 1, steps = 6\n",
      "03:12:50 [INFO] train episode 3949: winner = 0, steps = 8\n",
      "03:12:50 [INFO] train episode 3950: winner = 0, steps = 8\n",
      "03:12:53 [INFO] train episode 3951: winner = 0, steps = 8\n",
      "03:12:54 [INFO] train episode 3952: winner = 0, steps = 8\n",
      "03:12:54 [INFO] train episode 3953: winner = 0, steps = 8\n",
      "03:12:54 [INFO] train episode 3954: winner = 1, steps = 4\n",
      "03:12:54 [INFO] train episode 3955: winner = 0, steps = 8\n",
      "03:13:03 [INFO] train episode 3956: winner = 0, steps = 8\n",
      "03:13:04 [INFO] train episode 3957: winner = 1, steps = 6\n",
      "03:13:04 [INFO] train episode 3958: winner = -1, steps = 5\n",
      "03:13:05 [INFO] train episode 3959: winner = 0, steps = 8\n",
      "03:13:05 [INFO] train episode 3960: winner = 0, steps = 8\n",
      "03:13:10 [INFO] train episode 3961: winner = 0, steps = 8\n",
      "03:13:10 [INFO] train episode 3962: winner = 0, steps = 8\n",
      "03:13:11 [INFO] train episode 3963: winner = 0, steps = 8\n",
      "03:13:11 [INFO] train episode 3964: winner = 0, steps = 8\n",
      "03:13:13 [INFO] train episode 3965: winner = 0, steps = 8\n",
      "03:13:16 [INFO] train episode 3966: winner = 1, steps = 6\n",
      "03:13:17 [INFO] train episode 3967: winner = 0, steps = 8\n",
      "03:13:17 [INFO] train episode 3968: winner = 1, steps = 4\n",
      "03:13:17 [INFO] train episode 3969: winner = 0, steps = 8\n",
      "03:13:24 [INFO] train episode 3970: winner = 0, steps = 8\n",
      "03:13:26 [INFO] train episode 3971: winner = 1, steps = 6\n",
      "03:13:26 [INFO] train episode 3972: winner = 0, steps = 8\n",
      "03:13:27 [INFO] train episode 3973: winner = 0, steps = 8\n",
      "03:13:28 [INFO] train episode 3974: winner = -1, steps = 7\n",
      "03:13:28 [INFO] train episode 3975: winner = -1, steps = 5\n",
      "03:13:29 [INFO] train episode 3976: winner = 0, steps = 8\n",
      "03:13:29 [INFO] train episode 3977: winner = 1, steps = 6\n",
      "03:13:30 [INFO] train episode 3978: winner = 0, steps = 8\n",
      "03:13:30 [INFO] train episode 3979: winner = 1, steps = 6\n",
      "03:13:32 [INFO] train episode 3980: winner = 1, steps = 6\n",
      "03:13:32 [INFO] train episode 3981: winner = 0, steps = 8\n",
      "03:13:33 [INFO] train episode 3982: winner = 1, steps = 4\n",
      "03:13:33 [INFO] train episode 3983: winner = 0, steps = 8\n",
      "03:13:36 [INFO] train episode 3984: winner = 1, steps = 6\n",
      "03:13:37 [INFO] train episode 3985: winner = 0, steps = 8\n",
      "03:13:39 [INFO] train episode 3986: winner = 1, steps = 6\n",
      "03:13:40 [INFO] train episode 3987: winner = 1, steps = 4\n",
      "03:13:41 [INFO] train episode 3988: winner = 1, steps = 6\n",
      "03:13:41 [INFO] train episode 3989: winner = 0, steps = 8\n",
      "03:13:42 [INFO] train episode 3990: winner = -1, steps = 5\n",
      "03:13:45 [INFO] train episode 3991: winner = -1, steps = 7\n",
      "03:13:47 [INFO] train episode 3992: winner = 1, steps = 6\n",
      "03:13:47 [INFO] test episode 3992:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "03:13:56 [INFO] step 0：player 1, action (1, 0)\n",
      "+++\n",
      "o++\n",
      "+++\n",
      "03:14:03 [INFO] step 1：player -1, action (2, 0)\n",
      "+++\n",
      "o++\n",
      "x++\n",
      "03:14:09 [INFO] step 2：player 1, action (2, 1)\n",
      "+++\n",
      "o++\n",
      "xo+\n",
      "03:14:12 [INFO] step 3：player -1, action (1, 2)\n",
      "+++\n",
      "o+x\n",
      "xo+\n",
      "03:14:15 [INFO] step 4：player 1, action (0, 2)\n",
      "++o\n",
      "o+x\n",
      "xo+\n",
      "03:14:16 [INFO] step 5：player -1, action (0, 0)\n",
      "x+o\n",
      "o+x\n",
      "xo+\n",
      "03:14:16 [INFO] step 6：player 1, action (1, 1)\n",
      "x+o\n",
      "oox\n",
      "xo+\n",
      "03:14:17 [INFO] step 7：player -1, action (0, 1)\n",
      "xxo\n",
      "oox\n",
      "xo+\n",
      "03:14:17 [INFO] step 8：player 1, action (2, 2)\n",
      "xxo\n",
      "oox\n",
      "xoo\n",
      "03:14:17 [INFO] test episode 3992: winner = 0, steps = 8\n",
      "03:14:29 [INFO] train episode 3993: winner = 1, steps = 6\n",
      "03:14:46 [INFO] train episode 3994: winner = -1, steps = 7\n",
      "03:14:56 [INFO] train episode 3995: winner = 1, steps = 6\n",
      "03:15:07 [INFO] train episode 3996: winner = -1, steps = 5\n",
      "03:15:10 [INFO] train episode 3997: winner = 1, steps = 4\n",
      "03:15:18 [INFO] train episode 3998: winner = 1, steps = 6\n",
      "03:15:20 [INFO] train episode 3999: winner = 1, steps = 4\n",
      "03:15:29 [INFO] train episode 4000: winner = 0, steps = 8\n",
      "03:15:32 [INFO] train episode 4001: winner = 1, steps = 6\n",
      "03:15:43 [INFO] train episode 4002: winner = 0, steps = 8\n",
      "03:15:51 [INFO] train episode 4003: winner = 1, steps = 6\n",
      "03:15:59 [INFO] train episode 4004: winner = 0, steps = 8\n",
      "03:16:04 [INFO] train episode 4005: winner = 1, steps = 6\n",
      "03:16:05 [INFO] train episode 4006: winner = 1, steps = 4\n",
      "03:16:07 [INFO] train episode 4007: winner = 0, steps = 8\n",
      "03:16:08 [INFO] train episode 4008: winner = 0, steps = 8\n",
      "03:16:17 [INFO] train episode 4009: winner = 0, steps = 8\n",
      "03:16:22 [INFO] train episode 4010: winner = 1, steps = 6\n",
      "03:16:24 [INFO] train episode 4011: winner = 1, steps = 6\n",
      "03:16:24 [INFO] train episode 4012: winner = -1, steps = 5\n",
      "03:16:32 [INFO] train episode 4013: winner = 1, steps = 6\n",
      "03:16:32 [INFO] train episode 4014: winner = 1, steps = 4\n",
      "03:16:36 [INFO] train episode 4015: winner = 0, steps = 8\n",
      "03:16:41 [INFO] train episode 4016: winner = 0, steps = 8\n",
      "03:16:43 [INFO] train episode 4017: winner = 1, steps = 8\n",
      "03:16:47 [INFO] train episode 4018: winner = -1, steps = 5\n",
      "03:16:53 [INFO] train episode 4019: winner = 0, steps = 8\n",
      "03:16:53 [INFO] train episode 4020: winner = 1, steps = 4\n",
      "03:16:55 [INFO] train episode 4021: winner = 1, steps = 4\n",
      "03:17:03 [INFO] train episode 4022: winner = 1, steps = 4\n",
      "03:17:05 [INFO] train episode 4023: winner = 1, steps = 6\n",
      "03:17:08 [INFO] train episode 4024: winner = 1, steps = 6\n",
      "03:17:09 [INFO] train episode 4025: winner = 1, steps = 6\n",
      "03:17:10 [INFO] train episode 4026: winner = 0, steps = 8\n",
      "03:17:17 [INFO] train episode 4027: winner = 1, steps = 6\n",
      "03:17:26 [INFO] train episode 4028: winner = -1, steps = 7\n",
      "03:17:29 [INFO] train episode 4029: winner = 1, steps = 6\n",
      "03:17:32 [INFO] train episode 4030: winner = -1, steps = 5\n",
      "03:17:33 [INFO] train episode 4031: winner = 0, steps = 8\n",
      "03:17:34 [INFO] train episode 4032: winner = 0, steps = 8\n",
      "03:17:35 [INFO] train episode 4033: winner = 0, steps = 8\n",
      "03:17:38 [INFO] train episode 4034: winner = 1, steps = 4\n",
      "03:17:39 [INFO] train episode 4035: winner = -1, steps = 7\n",
      "03:17:39 [INFO] train episode 4036: winner = 1, steps = 4\n",
      "03:17:41 [INFO] train episode 4037: winner = 1, steps = 4\n",
      "03:17:43 [INFO] train episode 4038: winner = 1, steps = 4\n",
      "03:17:45 [INFO] train episode 4039: winner = 1, steps = 6\n",
      "03:17:47 [INFO] train episode 4040: winner = 0, steps = 8\n",
      "03:17:48 [INFO] train episode 4041: winner = 0, steps = 8\n",
      "03:17:48 [INFO] train episode 4042: winner = -1, steps = 5\n",
      "03:17:51 [INFO] train episode 4043: winner = 1, steps = 4\n",
      "03:17:54 [INFO] train episode 4044: winner = 1, steps = 6\n",
      "03:17:56 [INFO] train episode 4045: winner = 1, steps = 6\n",
      "03:17:57 [INFO] train episode 4046: winner = 0, steps = 8\n",
      "03:17:58 [INFO] train episode 4047: winner = 0, steps = 8\n",
      "03:18:00 [INFO] train episode 4048: winner = 0, steps = 8\n",
      "03:18:00 [INFO] train episode 4049: winner = 1, steps = 4\n",
      "03:18:02 [INFO] train episode 4050: winner = 1, steps = 6\n",
      "03:18:04 [INFO] train episode 4051: winner = 0, steps = 8\n",
      "03:18:05 [INFO] train episode 4052: winner = 0, steps = 8\n",
      "03:18:08 [INFO] train episode 4053: winner = 1, steps = 4\n",
      "03:18:11 [INFO] train episode 4054: winner = 0, steps = 8\n",
      "03:18:13 [INFO] train episode 4055: winner = 1, steps = 6\n",
      "03:18:17 [INFO] train episode 4056: winner = 0, steps = 8\n",
      "03:18:19 [INFO] train episode 4057: winner = 0, steps = 8\n",
      "03:18:21 [INFO] train episode 4058: winner = 0, steps = 8\n",
      "03:18:22 [INFO] train episode 4059: winner = 1, steps = 6\n",
      "03:18:24 [INFO] train episode 4060: winner = -1, steps = 7\n",
      "03:18:28 [INFO] train episode 4061: winner = 0, steps = 8\n",
      "03:18:31 [INFO] train episode 4062: winner = 0, steps = 8\n",
      "03:18:34 [INFO] train episode 4063: winner = 0, steps = 8\n",
      "03:18:38 [INFO] train episode 4064: winner = -1, steps = 5\n",
      "03:18:38 [INFO] train episode 4065: winner = 0, steps = 8\n",
      "03:18:40 [INFO] train episode 4066: winner = 1, steps = 4\n",
      "03:18:40 [INFO] train episode 4067: winner = 0, steps = 8\n",
      "03:18:41 [INFO] train episode 4068: winner = 0, steps = 8\n",
      "03:18:41 [INFO] train episode 4069: winner = -1, steps = 7\n",
      "03:18:44 [INFO] train episode 4070: winner = 1, steps = 6\n",
      "03:18:44 [INFO] train episode 4071: winner = -1, steps = 5\n",
      "03:18:44 [INFO] train episode 4072: winner = 1, steps = 4\n",
      "03:18:44 [INFO] train episode 4073: winner = -1, steps = 5\n",
      "03:18:45 [INFO] train episode 4074: winner = -1, steps = 5\n",
      "03:18:47 [INFO] train episode 4075: winner = 0, steps = 8\n",
      "03:18:48 [INFO] train episode 4076: winner = 0, steps = 8\n",
      "03:18:51 [INFO] train episode 4077: winner = 1, steps = 6\n",
      "03:18:53 [INFO] train episode 4078: winner = 0, steps = 8\n",
      "03:18:54 [INFO] train episode 4079: winner = 1, steps = 6\n",
      "03:18:54 [INFO] train episode 4080: winner = 1, steps = 6\n",
      "03:18:56 [INFO] train episode 4081: winner = -1, steps = 5\n",
      "03:18:56 [INFO] train episode 4082: winner = 1, steps = 6\n",
      "03:18:56 [INFO] train episode 4083: winner = 0, steps = 8\n",
      "03:18:59 [INFO] train episode 4084: winner = 1, steps = 6\n",
      "03:18:59 [INFO] train episode 4085: winner = 1, steps = 6\n",
      "03:19:03 [INFO] train episode 4086: winner = -1, steps = 5\n",
      "03:19:04 [INFO] train episode 4087: winner = 0, steps = 8\n",
      "03:19:07 [INFO] train episode 4088: winner = 1, steps = 6\n",
      "03:19:09 [INFO] train episode 4089: winner = 1, steps = 6\n",
      "03:19:10 [INFO] train episode 4090: winner = 1, steps = 6\n",
      "03:19:11 [INFO] train episode 4091: winner = 1, steps = 6\n",
      "03:19:12 [INFO] train episode 4092: winner = 0, steps = 8\n",
      "03:19:14 [INFO] train episode 4093: winner = 1, steps = 6\n",
      "03:19:14 [INFO] train episode 4094: winner = 1, steps = 6\n",
      "03:19:18 [INFO] train episode 4095: winner = 0, steps = 8\n",
      "03:19:20 [INFO] train episode 4096: winner = 0, steps = 8\n",
      "03:19:21 [INFO] train episode 4097: winner = -1, steps = 7\n",
      "03:19:24 [INFO] train episode 4098: winner = 0, steps = 8\n",
      "03:19:27 [INFO] train episode 4099: winner = 0, steps = 8\n",
      "03:19:28 [INFO] train episode 4100: winner = 1, steps = 4\n",
      "03:19:28 [INFO] train episode 4101: winner = 1, steps = 4\n",
      "03:19:29 [INFO] train episode 4102: winner = 1, steps = 6\n",
      "03:19:31 [INFO] train episode 4103: winner = 1, steps = 6\n",
      "03:19:32 [INFO] train episode 4104: winner = 0, steps = 8\n",
      "03:19:32 [INFO] train episode 4105: winner = 1, steps = 4\n",
      "03:19:32 [INFO] train episode 4106: winner = 0, steps = 8\n",
      "03:19:33 [INFO] train episode 4107: winner = 1, steps = 6\n",
      "03:19:35 [INFO] train episode 4108: winner = 0, steps = 8\n",
      "03:19:36 [INFO] train episode 4109: winner = 1, steps = 8\n",
      "03:19:38 [INFO] train episode 4110: winner = 0, steps = 8\n",
      "03:19:39 [INFO] train episode 4111: winner = 0, steps = 8\n",
      "03:19:40 [INFO] train episode 4112: winner = 1, steps = 4\n",
      "03:19:40 [INFO] train episode 4113: winner = 1, steps = 6\n",
      "03:19:42 [INFO] train episode 4114: winner = 1, steps = 6\n",
      "03:19:44 [INFO] train episode 4115: winner = 1, steps = 6\n",
      "03:19:44 [INFO] train episode 4116: winner = 1, steps = 6\n",
      "03:19:44 [INFO] train episode 4117: winner = 1, steps = 6\n",
      "03:19:45 [INFO] train episode 4118: winner = 1, steps = 4\n",
      "03:19:46 [INFO] train episode 4119: winner = 1, steps = 6\n",
      "03:19:47 [INFO] train episode 4120: winner = 1, steps = 4\n",
      "03:19:49 [INFO] train episode 4121: winner = 1, steps = 6\n",
      "03:19:49 [INFO] train episode 4122: winner = 1, steps = 6\n",
      "03:19:49 [INFO] train episode 4123: winner = 1, steps = 4\n",
      "03:19:49 [INFO] train episode 4124: winner = 0, steps = 8\n",
      "03:19:51 [INFO] train episode 4125: winner = -1, steps = 7\n",
      "03:19:51 [INFO] train episode 4126: winner = 0, steps = 8\n",
      "03:19:52 [INFO] train episode 4127: winner = 1, steps = 4\n",
      "03:19:53 [INFO] train episode 4128: winner = 1, steps = 8\n",
      "03:19:53 [INFO] test episode 4128:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "03:20:02 [INFO] step 0：player 1, action (1, 0)\n",
      "+++\n",
      "o++\n",
      "+++\n",
      "03:20:10 [INFO] step 1：player -1, action (0, 2)\n",
      "++x\n",
      "o++\n",
      "+++\n",
      "03:20:14 [INFO] step 2：player 1, action (1, 1)\n",
      "++x\n",
      "oo+\n",
      "+++\n",
      "03:20:15 [INFO] step 3：player -1, action (0, 0)\n",
      "x+x\n",
      "oo+\n",
      "+++\n",
      "03:20:16 [INFO] step 4：player 1, action (1, 2)\n",
      "x+x\n",
      "ooo\n",
      "+++\n",
      "03:20:16 [INFO] test episode 4128: winner = 1, steps = 4\n",
      "03:20:31 [INFO] train episode 4129: winner = 1, steps = 6\n",
      "03:20:41 [INFO] train episode 4130: winner = 1, steps = 6\n",
      "03:20:54 [INFO] train episode 4131: winner = 1, steps = 4\n",
      "03:21:01 [INFO] train episode 4132: winner = 1, steps = 6\n",
      "03:21:06 [INFO] train episode 4133: winner = 0, steps = 8\n",
      "03:21:09 [INFO] train episode 4134: winner = 1, steps = 6\n",
      "03:21:12 [INFO] train episode 4135: winner = 1, steps = 6\n",
      "03:21:24 [INFO] train episode 4136: winner = 0, steps = 8\n",
      "03:21:24 [INFO] train episode 4137: winner = 1, steps = 6\n",
      "03:21:32 [INFO] train episode 4138: winner = -1, steps = 7\n",
      "03:21:34 [INFO] train episode 4139: winner = 1, steps = 6\n",
      "03:21:34 [INFO] train episode 4140: winner = 1, steps = 6\n",
      "03:21:37 [INFO] train episode 4141: winner = 0, steps = 8\n",
      "03:21:47 [INFO] train episode 4142: winner = 0, steps = 8\n",
      "03:21:56 [INFO] train episode 4143: winner = 0, steps = 8\n",
      "03:22:00 [INFO] train episode 4144: winner = 0, steps = 8\n",
      "03:22:07 [INFO] train episode 4145: winner = 1, steps = 4\n",
      "03:22:13 [INFO] train episode 4146: winner = -1, steps = 7\n",
      "03:22:22 [INFO] train episode 4147: winner = 1, steps = 4\n",
      "03:22:33 [INFO] train episode 4148: winner = 1, steps = 6\n",
      "03:22:35 [INFO] train episode 4149: winner = 1, steps = 6\n",
      "03:22:38 [INFO] train episode 4150: winner = 1, steps = 6\n",
      "03:22:41 [INFO] train episode 4151: winner = 1, steps = 4\n",
      "03:22:42 [INFO] train episode 4152: winner = 1, steps = 4\n",
      "03:22:44 [INFO] train episode 4153: winner = 1, steps = 4\n",
      "03:22:46 [INFO] train episode 4154: winner = 1, steps = 6\n",
      "03:22:52 [INFO] train episode 4155: winner = 1, steps = 6\n",
      "03:22:54 [INFO] train episode 4156: winner = 0, steps = 8\n",
      "03:22:55 [INFO] train episode 4157: winner = 1, steps = 4\n",
      "03:22:55 [INFO] train episode 4158: winner = 1, steps = 6\n",
      "03:22:58 [INFO] train episode 4159: winner = 1, steps = 4\n",
      "03:22:58 [INFO] train episode 4160: winner = 1, steps = 4\n",
      "03:23:01 [INFO] train episode 4161: winner = 1, steps = 4\n",
      "03:23:02 [INFO] train episode 4162: winner = 1, steps = 6\n",
      "03:23:02 [INFO] train episode 4163: winner = 0, steps = 8\n",
      "03:23:13 [INFO] train episode 4164: winner = 0, steps = 8\n",
      "03:23:17 [INFO] train episode 4165: winner = 0, steps = 8\n",
      "03:23:21 [INFO] train episode 4166: winner = 0, steps = 8\n",
      "03:23:23 [INFO] train episode 4167: winner = -1, steps = 7\n",
      "03:23:26 [INFO] train episode 4168: winner = 1, steps = 6\n",
      "03:23:28 [INFO] train episode 4169: winner = 1, steps = 6\n",
      "03:23:28 [INFO] train episode 4170: winner = 0, steps = 8\n",
      "03:23:28 [INFO] train episode 4171: winner = 1, steps = 4\n",
      "03:23:31 [INFO] train episode 4172: winner = 0, steps = 8\n",
      "03:23:32 [INFO] train episode 4173: winner = 1, steps = 4\n",
      "03:23:35 [INFO] train episode 4174: winner = 1, steps = 6\n",
      "03:23:39 [INFO] train episode 4175: winner = 0, steps = 8\n",
      "03:23:39 [INFO] train episode 4176: winner = 1, steps = 4\n",
      "03:23:41 [INFO] train episode 4177: winner = 1, steps = 6\n",
      "03:23:46 [INFO] train episode 4178: winner = 0, steps = 8\n",
      "03:23:46 [INFO] train episode 4179: winner = 0, steps = 8\n",
      "03:23:46 [INFO] train episode 4180: winner = 1, steps = 6\n",
      "03:23:47 [INFO] train episode 4181: winner = 0, steps = 8\n",
      "03:23:50 [INFO] train episode 4182: winner = 1, steps = 6\n",
      "03:23:54 [INFO] train episode 4183: winner = 0, steps = 8\n",
      "03:23:56 [INFO] train episode 4184: winner = 1, steps = 6\n",
      "03:23:57 [INFO] train episode 4185: winner = 1, steps = 6\n",
      "03:23:59 [INFO] train episode 4186: winner = 1, steps = 4\n",
      "03:24:01 [INFO] train episode 4187: winner = 1, steps = 6\n",
      "03:24:03 [INFO] train episode 4188: winner = 1, steps = 6\n",
      "03:24:04 [INFO] train episode 4189: winner = 0, steps = 8\n",
      "03:24:06 [INFO] train episode 4190: winner = 1, steps = 6\n",
      "03:24:08 [INFO] train episode 4191: winner = 1, steps = 6\n",
      "03:24:09 [INFO] train episode 4192: winner = -1, steps = 5\n",
      "03:24:13 [INFO] train episode 4193: winner = 0, steps = 8\n",
      "03:24:16 [INFO] train episode 4194: winner = 1, steps = 8\n",
      "03:24:17 [INFO] train episode 4195: winner = -1, steps = 5\n",
      "03:24:22 [INFO] train episode 4196: winner = 0, steps = 8\n",
      "03:24:24 [INFO] train episode 4197: winner = 1, steps = 6\n",
      "03:24:24 [INFO] train episode 4198: winner = 1, steps = 6\n",
      "03:24:24 [INFO] train episode 4199: winner = 1, steps = 4\n",
      "03:24:26 [INFO] train episode 4200: winner = 1, steps = 6\n",
      "03:24:32 [INFO] train episode 4201: winner = 0, steps = 8\n",
      "03:24:38 [INFO] train episode 4202: winner = 0, steps = 8\n",
      "03:24:38 [INFO] train episode 4203: winner = 0, steps = 8\n",
      "03:24:38 [INFO] train episode 4204: winner = 1, steps = 6\n",
      "03:24:39 [INFO] train episode 4205: winner = 1, steps = 4\n",
      "03:24:42 [INFO] train episode 4206: winner = 1, steps = 8\n",
      "03:24:44 [INFO] train episode 4207: winner = 1, steps = 6\n",
      "03:24:45 [INFO] train episode 4208: winner = 1, steps = 6\n",
      "03:24:45 [INFO] train episode 4209: winner = 1, steps = 4\n",
      "03:24:46 [INFO] train episode 4210: winner = 1, steps = 4\n",
      "03:24:47 [INFO] train episode 4211: winner = 0, steps = 8\n",
      "03:24:49 [INFO] train episode 4212: winner = 0, steps = 8\n",
      "03:24:49 [INFO] train episode 4213: winner = -1, steps = 5\n",
      "03:24:51 [INFO] train episode 4214: winner = 1, steps = 4\n",
      "03:24:51 [INFO] train episode 4215: winner = 1, steps = 4\n",
      "03:24:51 [INFO] train episode 4216: winner = 1, steps = 6\n",
      "03:24:51 [INFO] train episode 4217: winner = 0, steps = 8\n",
      "03:24:54 [INFO] train episode 4218: winner = 1, steps = 6\n",
      "03:24:54 [INFO] train episode 4219: winner = 0, steps = 8\n",
      "03:24:54 [INFO] train episode 4220: winner = 1, steps = 6\n",
      "03:24:56 [INFO] train episode 4221: winner = 0, steps = 8\n",
      "03:25:03 [INFO] train episode 4222: winner = 1, steps = 8\n",
      "03:25:04 [INFO] train episode 4223: winner = -1, steps = 7\n",
      "03:25:04 [INFO] train episode 4224: winner = 1, steps = 4\n",
      "03:25:05 [INFO] train episode 4225: winner = 1, steps = 4\n",
      "03:25:06 [INFO] train episode 4226: winner = 0, steps = 8\n",
      "03:25:10 [INFO] train episode 4227: winner = 0, steps = 8\n",
      "03:25:13 [INFO] train episode 4228: winner = 1, steps = 8\n",
      "03:25:15 [INFO] train episode 4229: winner = -1, steps = 5\n",
      "03:25:15 [INFO] train episode 4230: winner = 1, steps = 6\n",
      "03:25:16 [INFO] train episode 4231: winner = 1, steps = 6\n",
      "03:25:17 [INFO] train episode 4232: winner = 0, steps = 8\n",
      "03:25:19 [INFO] train episode 4233: winner = -1, steps = 5\n",
      "03:25:19 [INFO] train episode 4234: winner = 0, steps = 8\n",
      "03:25:20 [INFO] train episode 4235: winner = 1, steps = 4\n",
      "03:25:21 [INFO] train episode 4236: winner = 1, steps = 6\n",
      "03:25:22 [INFO] train episode 4237: winner = 1, steps = 6\n",
      "03:25:23 [INFO] train episode 4238: winner = 1, steps = 4\n",
      "03:25:23 [INFO] train episode 4239: winner = -1, steps = 5\n",
      "03:25:24 [INFO] train episode 4240: winner = 0, steps = 8\n",
      "03:25:25 [INFO] train episode 4241: winner = 1, steps = 6\n",
      "03:25:26 [INFO] train episode 4242: winner = 1, steps = 6\n",
      "03:25:28 [INFO] train episode 4243: winner = 1, steps = 6\n",
      "03:25:28 [INFO] train episode 4244: winner = 0, steps = 8\n",
      "03:25:29 [INFO] train episode 4245: winner = 1, steps = 8\n",
      "03:25:31 [INFO] train episode 4246: winner = 0, steps = 8\n",
      "03:25:34 [INFO] train episode 4247: winner = 1, steps = 6\n",
      "03:25:35 [INFO] train episode 4248: winner = 1, steps = 6\n",
      "03:25:35 [INFO] train episode 4249: winner = 0, steps = 8\n",
      "03:25:36 [INFO] train episode 4250: winner = -1, steps = 5\n",
      "03:25:36 [INFO] train episode 4251: winner = 1, steps = 6\n",
      "03:25:38 [INFO] train episode 4252: winner = 1, steps = 4\n",
      "03:25:38 [INFO] train episode 4253: winner = 1, steps = 4\n",
      "03:25:39 [INFO] train episode 4254: winner = 1, steps = 4\n",
      "03:25:42 [INFO] train episode 4255: winner = 1, steps = 4\n",
      "03:25:42 [INFO] train episode 4256: winner = 0, steps = 8\n",
      "03:25:42 [INFO] train episode 4257: winner = 1, steps = 4\n",
      "03:25:44 [INFO] train episode 4258: winner = 0, steps = 8\n",
      "03:25:44 [INFO] train episode 4259: winner = 1, steps = 4\n",
      "03:25:45 [INFO] train episode 4260: winner = 1, steps = 6\n",
      "03:25:46 [INFO] train episode 4261: winner = 1, steps = 4\n",
      "03:25:46 [INFO] train episode 4262: winner = 1, steps = 6\n",
      "03:25:46 [INFO] train episode 4263: winner = 0, steps = 8\n",
      "03:25:49 [INFO] train episode 4264: winner = 1, steps = 6\n",
      "03:25:53 [INFO] train episode 4265: winner = 0, steps = 8\n",
      "03:25:57 [INFO] train episode 4266: winner = 1, steps = 8\n",
      "03:25:58 [INFO] train episode 4267: winner = -1, steps = 7\n",
      "03:25:58 [INFO] test episode 4267:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "03:26:07 [INFO] step 0：player 1, action (0, 1)\n",
      "+o+\n",
      "+++\n",
      "+++\n",
      "03:26:14 [INFO] step 1：player -1, action (0, 0)\n",
      "xo+\n",
      "+++\n",
      "+++\n",
      "03:26:21 [INFO] step 2：player 1, action (1, 1)\n",
      "xo+\n",
      "+o+\n",
      "+++\n",
      "03:26:22 [INFO] step 3：player -1, action (2, 2)\n",
      "xo+\n",
      "+o+\n",
      "++x\n",
      "03:26:23 [INFO] step 4：player 1, action (1, 2)\n",
      "xo+\n",
      "+oo\n",
      "++x\n",
      "03:26:24 [INFO] step 5：player -1, action (2, 0)\n",
      "xo+\n",
      "+oo\n",
      "x+x\n",
      "03:26:24 [INFO] step 6：player 1, action (2, 1)\n",
      "xo+\n",
      "+oo\n",
      "xox\n",
      "03:26:24 [INFO] test episode 4267: winner = 1, steps = 6\n",
      "03:26:33 [INFO] train episode 4268: winner = 1, steps = 4\n",
      "03:26:45 [INFO] train episode 4269: winner = 1, steps = 6\n",
      "03:26:59 [INFO] train episode 4270: winner = 0, steps = 8\n",
      "03:27:12 [INFO] train episode 4271: winner = 0, steps = 8\n",
      "03:27:17 [INFO] train episode 4272: winner = 0, steps = 8\n",
      "03:27:19 [INFO] train episode 4273: winner = 1, steps = 4\n",
      "03:27:22 [INFO] train episode 4274: winner = 0, steps = 8\n",
      "03:27:25 [INFO] train episode 4275: winner = 1, steps = 4\n",
      "03:27:35 [INFO] train episode 4276: winner = 0, steps = 8\n",
      "03:27:41 [INFO] train episode 4277: winner = 0, steps = 8\n",
      "03:27:45 [INFO] train episode 4278: winner = 1, steps = 6\n",
      "03:27:48 [INFO] train episode 4279: winner = 1, steps = 6\n",
      "03:27:50 [INFO] train episode 4280: winner = 1, steps = 4\n",
      "03:27:54 [INFO] train episode 4281: winner = 1, steps = 6\n",
      "03:27:55 [INFO] train episode 4282: winner = 1, steps = 4\n",
      "03:27:57 [INFO] train episode 4283: winner = 0, steps = 8\n",
      "03:28:01 [INFO] train episode 4284: winner = 1, steps = 6\n",
      "03:28:01 [INFO] train episode 4285: winner = 1, steps = 4\n",
      "03:28:11 [INFO] train episode 4286: winner = -1, steps = 7\n",
      "03:28:11 [INFO] train episode 4287: winner = 1, steps = 4\n",
      "03:28:23 [INFO] train episode 4288: winner = 0, steps = 8\n",
      "03:28:24 [INFO] train episode 4289: winner = 1, steps = 6\n",
      "03:28:27 [INFO] train episode 4290: winner = 1, steps = 6\n",
      "03:28:31 [INFO] train episode 4291: winner = 0, steps = 8\n",
      "03:28:33 [INFO] train episode 4292: winner = 0, steps = 8\n",
      "03:28:34 [INFO] train episode 4293: winner = 1, steps = 6\n",
      "03:28:37 [INFO] train episode 4294: winner = 1, steps = 8\n",
      "03:28:39 [INFO] train episode 4295: winner = 1, steps = 6\n",
      "03:28:40 [INFO] train episode 4296: winner = 0, steps = 8\n",
      "03:28:45 [INFO] train episode 4297: winner = 1, steps = 4\n",
      "03:28:45 [INFO] train episode 4298: winner = 1, steps = 4\n",
      "03:28:46 [INFO] train episode 4299: winner = 1, steps = 6\n",
      "03:28:51 [INFO] train episode 4300: winner = 1, steps = 6\n",
      "03:28:53 [INFO] train episode 4301: winner = 1, steps = 6\n",
      "03:29:02 [INFO] train episode 4302: winner = 1, steps = 6\n",
      "03:29:02 [INFO] train episode 4303: winner = 1, steps = 4\n",
      "03:29:06 [INFO] train episode 4304: winner = 1, steps = 6\n",
      "03:29:11 [INFO] train episode 4305: winner = 0, steps = 8\n",
      "03:29:14 [INFO] train episode 4306: winner = 1, steps = 4\n",
      "03:29:19 [INFO] train episode 4307: winner = 0, steps = 8\n",
      "03:29:24 [INFO] train episode 4308: winner = 0, steps = 8\n",
      "03:29:26 [INFO] train episode 4309: winner = 0, steps = 8\n",
      "03:29:26 [INFO] train episode 4310: winner = 1, steps = 6\n",
      "03:29:28 [INFO] train episode 4311: winner = 0, steps = 8\n",
      "03:29:32 [INFO] train episode 4312: winner = 1, steps = 8\n",
      "03:29:32 [INFO] train episode 4313: winner = 1, steps = 4\n",
      "03:29:37 [INFO] train episode 4314: winner = 1, steps = 6\n",
      "03:29:39 [INFO] train episode 4315: winner = 0, steps = 8\n",
      "03:29:39 [INFO] train episode 4316: winner = 1, steps = 6\n",
      "03:29:40 [INFO] train episode 4317: winner = -1, steps = 7\n",
      "03:29:41 [INFO] train episode 4318: winner = 1, steps = 6\n",
      "03:29:45 [INFO] train episode 4319: winner = 0, steps = 8\n",
      "03:29:45 [INFO] train episode 4320: winner = 1, steps = 4\n",
      "03:29:46 [INFO] train episode 4321: winner = 1, steps = 4\n",
      "03:29:48 [INFO] train episode 4322: winner = -1, steps = 5\n",
      "03:29:50 [INFO] train episode 4323: winner = 1, steps = 4\n",
      "03:29:51 [INFO] train episode 4324: winner = 0, steps = 8\n",
      "03:29:52 [INFO] train episode 4325: winner = 0, steps = 8\n",
      "03:29:53 [INFO] train episode 4326: winner = 1, steps = 6\n",
      "03:29:53 [INFO] train episode 4327: winner = 1, steps = 4\n",
      "03:29:55 [INFO] train episode 4328: winner = 1, steps = 4\n",
      "03:29:55 [INFO] train episode 4329: winner = 1, steps = 4\n",
      "03:29:56 [INFO] train episode 4330: winner = 0, steps = 8\n",
      "03:29:58 [INFO] train episode 4331: winner = -1, steps = 7\n",
      "03:29:58 [INFO] train episode 4332: winner = 0, steps = 8\n",
      "03:29:58 [INFO] train episode 4333: winner = 1, steps = 6\n",
      "03:29:58 [INFO] train episode 4334: winner = 1, steps = 6\n",
      "03:30:01 [INFO] train episode 4335: winner = 1, steps = 4\n",
      "03:30:04 [INFO] train episode 4336: winner = 1, steps = 8\n",
      "03:30:04 [INFO] train episode 4337: winner = 1, steps = 6\n",
      "03:30:05 [INFO] train episode 4338: winner = -1, steps = 7\n",
      "03:30:09 [INFO] train episode 4339: winner = 0, steps = 8\n",
      "03:30:11 [INFO] train episode 4340: winner = 1, steps = 8\n",
      "03:30:12 [INFO] train episode 4341: winner = 1, steps = 4\n",
      "03:30:16 [INFO] train episode 4342: winner = 0, steps = 8\n",
      "03:30:17 [INFO] train episode 4343: winner = 0, steps = 8\n",
      "03:30:20 [INFO] train episode 4344: winner = 1, steps = 4\n",
      "03:30:22 [INFO] train episode 4345: winner = 1, steps = 6\n",
      "03:30:22 [INFO] train episode 4346: winner = 1, steps = 4\n",
      "03:30:23 [INFO] train episode 4347: winner = 0, steps = 8\n",
      "03:30:26 [INFO] train episode 4348: winner = 1, steps = 6\n",
      "03:30:26 [INFO] train episode 4349: winner = 1, steps = 6\n",
      "03:30:27 [INFO] train episode 4350: winner = 1, steps = 4\n",
      "03:30:30 [INFO] train episode 4351: winner = 1, steps = 8\n",
      "03:30:33 [INFO] train episode 4352: winner = 1, steps = 6\n",
      "03:30:34 [INFO] train episode 4353: winner = 1, steps = 6\n",
      "03:30:36 [INFO] train episode 4354: winner = -1, steps = 7\n",
      "03:30:37 [INFO] train episode 4355: winner = 0, steps = 8\n",
      "03:30:37 [INFO] train episode 4356: winner = 1, steps = 4\n",
      "03:30:37 [INFO] train episode 4357: winner = 1, steps = 6\n",
      "03:30:37 [INFO] train episode 4358: winner = 1, steps = 4\n",
      "03:30:38 [INFO] train episode 4359: winner = 1, steps = 6\n",
      "03:30:40 [INFO] train episode 4360: winner = 0, steps = 8\n",
      "03:30:42 [INFO] train episode 4361: winner = 1, steps = 6\n",
      "03:30:47 [INFO] train episode 4362: winner = 0, steps = 8\n",
      "03:30:48 [INFO] train episode 4363: winner = 1, steps = 4\n",
      "03:30:51 [INFO] train episode 4364: winner = -1, steps = 7\n",
      "03:30:51 [INFO] train episode 4365: winner = 1, steps = 4\n",
      "03:30:52 [INFO] train episode 4366: winner = 0, steps = 8\n",
      "03:30:52 [INFO] train episode 4367: winner = 1, steps = 6\n",
      "03:30:54 [INFO] train episode 4368: winner = -1, steps = 7\n",
      "03:30:56 [INFO] train episode 4369: winner = 0, steps = 8\n",
      "03:31:01 [INFO] train episode 4370: winner = 0, steps = 8\n",
      "03:31:02 [INFO] train episode 4371: winner = 0, steps = 8\n",
      "03:31:02 [INFO] train episode 4372: winner = 1, steps = 4\n",
      "03:31:02 [INFO] train episode 4373: winner = 1, steps = 4\n",
      "03:31:02 [INFO] train episode 4374: winner = 1, steps = 4\n",
      "03:31:06 [INFO] train episode 4375: winner = 0, steps = 8\n",
      "03:31:06 [INFO] train episode 4376: winner = 1, steps = 4\n",
      "03:31:07 [INFO] train episode 4377: winner = 0, steps = 8\n",
      "03:31:07 [INFO] train episode 4378: winner = 1, steps = 6\n",
      "03:31:09 [INFO] train episode 4379: winner = 1, steps = 6\n",
      "03:31:11 [INFO] train episode 4380: winner = 0, steps = 8\n",
      "03:31:13 [INFO] train episode 4381: winner = 0, steps = 8\n",
      "03:31:13 [INFO] train episode 4382: winner = 0, steps = 8\n",
      "03:31:14 [INFO] train episode 4383: winner = 0, steps = 8\n",
      "03:31:15 [INFO] train episode 4384: winner = 0, steps = 8\n",
      "03:31:16 [INFO] train episode 4385: winner = 1, steps = 4\n",
      "03:31:18 [INFO] train episode 4386: winner = 0, steps = 8\n",
      "03:31:19 [INFO] train episode 4387: winner = 1, steps = 4\n",
      "03:31:21 [INFO] train episode 4388: winner = -1, steps = 7\n",
      "03:31:21 [INFO] train episode 4389: winner = 1, steps = 6\n",
      "03:31:21 [INFO] train episode 4390: winner = 1, steps = 6\n",
      "03:31:21 [INFO] train episode 4391: winner = 0, steps = 8\n",
      "03:31:23 [INFO] train episode 4392: winner = 1, steps = 8\n",
      "03:31:23 [INFO] train episode 4393: winner = 1, steps = 6\n",
      "03:31:26 [INFO] train episode 4394: winner = 1, steps = 6\n",
      "03:31:27 [INFO] train episode 4395: winner = -1, steps = 7\n",
      "03:31:28 [INFO] train episode 4396: winner = 0, steps = 8\n",
      "03:31:30 [INFO] train episode 4397: winner = 0, steps = 8\n",
      "03:31:31 [INFO] train episode 4398: winner = 1, steps = 6\n",
      "03:31:32 [INFO] train episode 4399: winner = 0, steps = 8\n",
      "03:31:33 [INFO] train episode 4400: winner = 1, steps = 8\n",
      "03:31:34 [INFO] train episode 4401: winner = 0, steps = 8\n",
      "03:31:35 [INFO] train episode 4402: winner = 1, steps = 6\n",
      "03:31:36 [INFO] train episode 4403: winner = 1, steps = 4\n",
      "03:31:37 [INFO] train episode 4404: winner = 1, steps = 4\n",
      "03:31:37 [INFO] test episode 4404:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "03:31:46 [INFO] step 0：player 1, action (2, 0)\n",
      "+++\n",
      "+++\n",
      "o++\n",
      "03:31:54 [INFO] step 1：player -1, action (2, 2)\n",
      "+++\n",
      "+++\n",
      "o+x\n",
      "03:31:57 [INFO] step 2：player 1, action (0, 1)\n",
      "+o+\n",
      "+++\n",
      "o+x\n",
      "03:32:01 [INFO] step 3：player -1, action (0, 2)\n",
      "+ox\n",
      "+++\n",
      "o+x\n",
      "03:32:01 [INFO] step 4：player 1, action (1, 2)\n",
      "+ox\n",
      "++o\n",
      "o+x\n",
      "03:32:02 [INFO] step 5：player -1, action (1, 1)\n",
      "+ox\n",
      "+xo\n",
      "o+x\n",
      "03:32:02 [INFO] step 6：player 1, action (0, 0)\n",
      "oox\n",
      "+xo\n",
      "o+x\n",
      "03:32:02 [INFO] step 7：player -1, action (1, 0)\n",
      "oox\n",
      "xxo\n",
      "o+x\n",
      "03:32:02 [INFO] step 8：player 1, action (2, 1)\n",
      "oox\n",
      "xxo\n",
      "oox\n",
      "03:32:02 [INFO] test episode 4404: winner = 0, steps = 8\n",
      "03:32:15 [INFO] train episode 4405: winner = 0, steps = 8\n",
      "03:32:27 [INFO] train episode 4406: winner = 0, steps = 8\n",
      "03:32:38 [INFO] train episode 4407: winner = -1, steps = 7\n",
      "03:32:40 [INFO] train episode 4408: winner = -1, steps = 5\n",
      "03:33:00 [INFO] train episode 4409: winner = 0, steps = 8\n",
      "03:33:09 [INFO] train episode 4410: winner = 1, steps = 6\n",
      "03:33:21 [INFO] train episode 4411: winner = 1, steps = 4\n",
      "03:33:29 [INFO] train episode 4412: winner = 1, steps = 4\n",
      "03:33:32 [INFO] train episode 4413: winner = 1, steps = 6\n",
      "03:33:36 [INFO] train episode 4414: winner = 0, steps = 8\n",
      "03:33:38 [INFO] train episode 4415: winner = 1, steps = 4\n",
      "03:33:47 [INFO] train episode 4416: winner = 1, steps = 6\n",
      "03:33:48 [INFO] train episode 4417: winner = 1, steps = 6\n",
      "03:33:50 [INFO] train episode 4418: winner = 1, steps = 6\n",
      "03:33:52 [INFO] train episode 4419: winner = 1, steps = 6\n",
      "03:33:57 [INFO] train episode 4420: winner = 1, steps = 6\n",
      "03:33:57 [INFO] train episode 4421: winner = 1, steps = 4\n",
      "03:34:05 [INFO] train episode 4422: winner = -1, steps = 7\n",
      "03:34:15 [INFO] train episode 4423: winner = 1, steps = 6\n",
      "03:34:18 [INFO] train episode 4424: winner = 0, steps = 8\n",
      "03:34:28 [INFO] train episode 4425: winner = 0, steps = 8\n",
      "03:34:30 [INFO] train episode 4426: winner = 1, steps = 4\n",
      "03:34:32 [INFO] train episode 4427: winner = 1, steps = 4\n",
      "03:34:36 [INFO] train episode 4428: winner = 0, steps = 8\n",
      "03:34:37 [INFO] train episode 4429: winner = 0, steps = 8\n",
      "03:34:40 [INFO] train episode 4430: winner = 0, steps = 8\n",
      "03:34:46 [INFO] train episode 4431: winner = -1, steps = 7\n",
      "03:34:47 [INFO] train episode 4432: winner = 1, steps = 4\n",
      "03:34:48 [INFO] train episode 4433: winner = 1, steps = 6\n",
      "03:34:48 [INFO] train episode 4434: winner = 1, steps = 6\n",
      "03:34:50 [INFO] train episode 4435: winner = 0, steps = 8\n",
      "03:34:55 [INFO] train episode 4436: winner = 0, steps = 8\n",
      "03:34:58 [INFO] train episode 4437: winner = 1, steps = 6\n",
      "03:35:05 [INFO] train episode 4438: winner = 0, steps = 8\n",
      "03:35:05 [INFO] train episode 4439: winner = 1, steps = 4\n",
      "03:35:10 [INFO] train episode 4440: winner = 1, steps = 8\n",
      "03:35:15 [INFO] train episode 4441: winner = 0, steps = 8\n",
      "03:35:18 [INFO] train episode 4442: winner = 0, steps = 8\n",
      "03:35:25 [INFO] train episode 4443: winner = 0, steps = 8\n",
      "03:35:26 [INFO] train episode 4444: winner = -1, steps = 7\n",
      "03:35:27 [INFO] train episode 4445: winner = -1, steps = 7\n",
      "03:35:28 [INFO] train episode 4446: winner = 1, steps = 4\n",
      "03:35:29 [INFO] train episode 4447: winner = 0, steps = 8\n",
      "03:35:30 [INFO] train episode 4448: winner = -1, steps = 7\n",
      "03:35:34 [INFO] train episode 4449: winner = 0, steps = 8\n",
      "03:35:37 [INFO] train episode 4450: winner = 0, steps = 8\n",
      "03:35:37 [INFO] train episode 4451: winner = 0, steps = 8\n",
      "03:35:39 [INFO] train episode 4452: winner = 1, steps = 4\n",
      "03:35:41 [INFO] train episode 4453: winner = 1, steps = 4\n",
      "03:35:43 [INFO] train episode 4454: winner = 1, steps = 6\n",
      "03:35:49 [INFO] train episode 4455: winner = 0, steps = 8\n",
      "03:35:49 [INFO] train episode 4456: winner = 0, steps = 8\n",
      "03:35:52 [INFO] train episode 4457: winner = 1, steps = 4\n",
      "03:35:53 [INFO] train episode 4458: winner = -1, steps = 7\n",
      "03:35:58 [INFO] train episode 4459: winner = 0, steps = 8\n",
      "03:35:58 [INFO] train episode 4460: winner = 1, steps = 4\n",
      "03:35:58 [INFO] train episode 4461: winner = 0, steps = 8\n",
      "03:35:59 [INFO] train episode 4462: winner = 1, steps = 6\n",
      "03:36:02 [INFO] train episode 4463: winner = 1, steps = 8\n",
      "03:36:04 [INFO] train episode 4464: winner = 1, steps = 6\n",
      "03:36:06 [INFO] train episode 4465: winner = 1, steps = 6\n",
      "03:36:08 [INFO] train episode 4466: winner = 1, steps = 4\n",
      "03:36:09 [INFO] train episode 4467: winner = 1, steps = 6\n",
      "03:36:11 [INFO] train episode 4468: winner = -1, steps = 5\n",
      "03:36:12 [INFO] train episode 4469: winner = 0, steps = 8\n",
      "03:36:13 [INFO] train episode 4470: winner = 1, steps = 8\n",
      "03:36:14 [INFO] train episode 4471: winner = 1, steps = 4\n",
      "03:36:14 [INFO] train episode 4472: winner = 1, steps = 4\n",
      "03:36:14 [INFO] train episode 4473: winner = 1, steps = 4\n",
      "03:36:15 [INFO] train episode 4474: winner = 1, steps = 6\n",
      "03:36:15 [INFO] train episode 4475: winner = 1, steps = 4\n",
      "03:36:16 [INFO] train episode 4476: winner = 1, steps = 6\n",
      "03:36:17 [INFO] train episode 4477: winner = 1, steps = 4\n",
      "03:36:20 [INFO] train episode 4478: winner = 0, steps = 8\n",
      "03:36:24 [INFO] train episode 4479: winner = 0, steps = 8\n",
      "03:36:27 [INFO] train episode 4480: winner = -1, steps = 5\n",
      "03:36:27 [INFO] train episode 4481: winner = 1, steps = 4\n",
      "03:36:28 [INFO] train episode 4482: winner = 1, steps = 4\n",
      "03:36:28 [INFO] train episode 4483: winner = 1, steps = 6\n",
      "03:36:29 [INFO] train episode 4484: winner = 1, steps = 6\n",
      "03:36:30 [INFO] train episode 4485: winner = 1, steps = 4\n",
      "03:36:37 [INFO] train episode 4486: winner = 0, steps = 8\n",
      "03:36:42 [INFO] train episode 4487: winner = 0, steps = 8\n",
      "03:36:43 [INFO] train episode 4488: winner = 1, steps = 6\n",
      "03:36:43 [INFO] train episode 4489: winner = 1, steps = 4\n",
      "03:36:45 [INFO] train episode 4490: winner = 1, steps = 6\n",
      "03:36:50 [INFO] train episode 4491: winner = 0, steps = 8\n",
      "03:36:50 [INFO] train episode 4492: winner = -1, steps = 7\n",
      "03:36:50 [INFO] train episode 4493: winner = 0, steps = 8\n",
      "03:36:52 [INFO] train episode 4494: winner = 0, steps = 8\n",
      "03:36:52 [INFO] train episode 4495: winner = 1, steps = 6\n",
      "03:36:54 [INFO] train episode 4496: winner = 1, steps = 6\n",
      "03:36:55 [INFO] train episode 4497: winner = 1, steps = 4\n",
      "03:36:56 [INFO] train episode 4498: winner = 1, steps = 6\n",
      "03:36:56 [INFO] train episode 4499: winner = 1, steps = 6\n",
      "03:36:58 [INFO] train episode 4500: winner = 1, steps = 4\n",
      "03:37:01 [INFO] train episode 4501: winner = 0, steps = 8\n",
      "03:37:03 [INFO] train episode 4502: winner = 1, steps = 4\n",
      "03:37:04 [INFO] train episode 4503: winner = 0, steps = 8\n",
      "03:37:05 [INFO] train episode 4504: winner = 1, steps = 6\n",
      "03:37:06 [INFO] train episode 4505: winner = 1, steps = 6\n",
      "03:37:07 [INFO] train episode 4506: winner = -1, steps = 5\n",
      "03:37:07 [INFO] train episode 4507: winner = 0, steps = 8\n",
      "03:37:08 [INFO] train episode 4508: winner = 1, steps = 4\n",
      "03:37:08 [INFO] train episode 4509: winner = 1, steps = 6\n",
      "03:37:09 [INFO] train episode 4510: winner = 1, steps = 6\n",
      "03:37:09 [INFO] train episode 4511: winner = 1, steps = 6\n",
      "03:37:10 [INFO] train episode 4512: winner = 1, steps = 6\n",
      "03:37:11 [INFO] train episode 4513: winner = 0, steps = 8\n",
      "03:37:13 [INFO] train episode 4514: winner = 1, steps = 6\n",
      "03:37:15 [INFO] train episode 4515: winner = 1, steps = 6\n",
      "03:37:17 [INFO] train episode 4516: winner = 0, steps = 8\n",
      "03:37:17 [INFO] train episode 4517: winner = -1, steps = 7\n",
      "03:37:17 [INFO] train episode 4518: winner = 1, steps = 4\n",
      "03:37:20 [INFO] train episode 4519: winner = 0, steps = 8\n",
      "03:37:21 [INFO] train episode 4520: winner = 1, steps = 6\n",
      "03:37:22 [INFO] train episode 4521: winner = 1, steps = 4\n",
      "03:37:24 [INFO] train episode 4522: winner = 0, steps = 8\n",
      "03:37:26 [INFO] train episode 4523: winner = 0, steps = 8\n",
      "03:37:26 [INFO] train episode 4524: winner = -1, steps = 7\n",
      "03:37:28 [INFO] train episode 4525: winner = 0, steps = 8\n",
      "03:37:30 [INFO] train episode 4526: winner = 0, steps = 8\n",
      "03:37:30 [INFO] train episode 4527: winner = 0, steps = 8\n",
      "03:37:30 [INFO] train episode 4528: winner = 1, steps = 4\n",
      "03:37:31 [INFO] train episode 4529: winner = 1, steps = 6\n",
      "03:37:32 [INFO] train episode 4530: winner = 1, steps = 4\n",
      "03:37:32 [INFO] train episode 4531: winner = 1, steps = 6\n",
      "03:37:33 [INFO] train episode 4532: winner = 1, steps = 4\n",
      "03:37:33 [INFO] train episode 4533: winner = 1, steps = 4\n",
      "03:37:35 [INFO] train episode 4534: winner = -1, steps = 5\n",
      "03:37:36 [INFO] train episode 4535: winner = 1, steps = 6\n",
      "03:37:40 [INFO] train episode 4536: winner = 0, steps = 8\n",
      "03:37:41 [INFO] train episode 4537: winner = 0, steps = 8\n",
      "03:37:41 [INFO] train episode 4538: winner = 1, steps = 8\n",
      "03:37:42 [INFO] train episode 4539: winner = 1, steps = 6\n",
      "03:37:42 [INFO] train episode 4540: winner = 1, steps = 6\n",
      "03:37:42 [INFO] train episode 4541: winner = 1, steps = 6\n",
      "03:37:43 [INFO] train episode 4542: winner = 0, steps = 8\n",
      "03:37:43 [INFO] test episode 4542:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "03:37:52 [INFO] step 0：player 1, action (0, 0)\n",
      "o++\n",
      "+++\n",
      "+++\n",
      "03:37:58 [INFO] step 1：player -1, action (1, 1)\n",
      "o++\n",
      "+x+\n",
      "+++\n",
      "03:38:04 [INFO] step 2：player 1, action (2, 0)\n",
      "o++\n",
      "+x+\n",
      "o++\n",
      "03:38:06 [INFO] step 3：player -1, action (2, 1)\n",
      "o++\n",
      "+x+\n",
      "ox+\n",
      "03:38:07 [INFO] step 4：player 1, action (1, 2)\n",
      "o++\n",
      "+xo\n",
      "ox+\n",
      "03:38:07 [INFO] step 5：player -1, action (0, 1)\n",
      "ox+\n",
      "+xo\n",
      "ox+\n",
      "03:38:07 [INFO] test episode 4542: winner = -1, steps = 5\n",
      "03:38:19 [INFO] train episode 4543: winner = 1, steps = 6\n",
      "03:38:31 [INFO] train episode 4544: winner = 0, steps = 8\n",
      "03:38:45 [INFO] train episode 4545: winner = 0, steps = 8\n",
      "03:38:51 [INFO] train episode 4546: winner = 0, steps = 8\n",
      "03:39:06 [INFO] train episode 4547: winner = 0, steps = 8\n",
      "03:39:15 [INFO] train episode 4548: winner = -1, steps = 7\n",
      "03:39:29 [INFO] train episode 4549: winner = -1, steps = 7\n",
      "03:39:36 [INFO] train episode 4550: winner = 1, steps = 4\n",
      "03:39:43 [INFO] train episode 4551: winner = 1, steps = 6\n",
      "03:39:50 [INFO] train episode 4552: winner = 0, steps = 8\n",
      "03:39:53 [INFO] train episode 4553: winner = 1, steps = 4\n",
      "03:39:56 [INFO] train episode 4554: winner = 1, steps = 4\n",
      "03:39:59 [INFO] train episode 4555: winner = 1, steps = 4\n",
      "03:40:02 [INFO] train episode 4556: winner = 1, steps = 6\n",
      "03:40:07 [INFO] train episode 4557: winner = -1, steps = 7\n",
      "03:40:08 [INFO] train episode 4558: winner = 1, steps = 4\n",
      "03:40:10 [INFO] train episode 4559: winner = 1, steps = 4\n",
      "03:40:21 [INFO] train episode 4560: winner = 0, steps = 8\n",
      "03:40:22 [INFO] train episode 4561: winner = 1, steps = 4\n",
      "03:40:27 [INFO] train episode 4562: winner = -1, steps = 7\n",
      "03:40:30 [INFO] train episode 4563: winner = 0, steps = 8\n",
      "03:40:31 [INFO] train episode 4564: winner = 1, steps = 6\n",
      "03:40:33 [INFO] train episode 4565: winner = 1, steps = 6\n",
      "03:40:39 [INFO] train episode 4566: winner = 0, steps = 8\n",
      "03:40:43 [INFO] train episode 4567: winner = 1, steps = 4\n",
      "03:40:48 [INFO] train episode 4568: winner = 1, steps = 4\n",
      "03:40:55 [INFO] train episode 4569: winner = 0, steps = 8\n",
      "03:40:55 [INFO] train episode 4570: winner = 1, steps = 6\n",
      "03:41:02 [INFO] train episode 4571: winner = 0, steps = 8\n",
      "03:41:05 [INFO] train episode 4572: winner = 1, steps = 4\n",
      "03:41:06 [INFO] train episode 4573: winner = 0, steps = 8\n",
      "03:41:09 [INFO] train episode 4574: winner = 1, steps = 6\n",
      "03:41:18 [INFO] train episode 4575: winner = 0, steps = 8\n",
      "03:41:19 [INFO] train episode 4576: winner = 1, steps = 6\n",
      "03:41:20 [INFO] train episode 4577: winner = 1, steps = 6\n",
      "03:41:21 [INFO] train episode 4578: winner = 1, steps = 6\n",
      "03:41:25 [INFO] train episode 4579: winner = 1, steps = 4\n",
      "03:41:29 [INFO] train episode 4580: winner = 0, steps = 8\n",
      "03:41:29 [INFO] train episode 4581: winner = 1, steps = 4\n",
      "03:41:31 [INFO] train episode 4582: winner = 1, steps = 6\n",
      "03:41:33 [INFO] train episode 4583: winner = 1, steps = 6\n",
      "03:41:35 [INFO] train episode 4584: winner = 1, steps = 6\n",
      "03:41:37 [INFO] train episode 4585: winner = 1, steps = 4\n",
      "03:41:39 [INFO] train episode 4586: winner = 0, steps = 8\n",
      "03:41:39 [INFO] train episode 4587: winner = 1, steps = 6\n",
      "03:41:41 [INFO] train episode 4588: winner = 1, steps = 6\n",
      "03:41:41 [INFO] train episode 4589: winner = 1, steps = 6\n",
      "03:41:45 [INFO] train episode 4590: winner = 0, steps = 8\n",
      "03:41:50 [INFO] train episode 4591: winner = 0, steps = 8\n",
      "03:41:51 [INFO] train episode 4592: winner = 1, steps = 6\n",
      "03:41:53 [INFO] train episode 4593: winner = 1, steps = 4\n",
      "03:41:56 [INFO] train episode 4594: winner = 0, steps = 8\n",
      "03:41:57 [INFO] train episode 4595: winner = 1, steps = 6\n",
      "03:41:58 [INFO] train episode 4596: winner = -1, steps = 5\n",
      "03:41:59 [INFO] train episode 4597: winner = 1, steps = 6\n",
      "03:42:02 [INFO] train episode 4598: winner = 0, steps = 8\n",
      "03:42:02 [INFO] train episode 4599: winner = 1, steps = 6\n",
      "03:42:04 [INFO] train episode 4600: winner = 1, steps = 4\n",
      "03:42:06 [INFO] train episode 4601: winner = 1, steps = 6\n",
      "03:42:07 [INFO] train episode 4602: winner = 0, steps = 8\n",
      "03:42:09 [INFO] train episode 4603: winner = 1, steps = 6\n",
      "03:42:11 [INFO] train episode 4604: winner = 0, steps = 8\n",
      "03:42:12 [INFO] train episode 4605: winner = -1, steps = 7\n",
      "03:42:13 [INFO] train episode 4606: winner = 1, steps = 6\n",
      "03:42:16 [INFO] train episode 4607: winner = 1, steps = 6\n",
      "03:42:16 [INFO] train episode 4608: winner = 1, steps = 6\n",
      "03:42:16 [INFO] train episode 4609: winner = 1, steps = 4\n",
      "03:42:18 [INFO] train episode 4610: winner = 1, steps = 6\n",
      "03:42:20 [INFO] train episode 4611: winner = 0, steps = 8\n",
      "03:42:23 [INFO] train episode 4612: winner = 0, steps = 8\n",
      "03:42:24 [INFO] train episode 4613: winner = -1, steps = 7\n",
      "03:42:26 [INFO] train episode 4614: winner = 1, steps = 4\n",
      "03:42:27 [INFO] train episode 4615: winner = 0, steps = 8\n",
      "03:42:29 [INFO] train episode 4616: winner = -1, steps = 5\n",
      "03:42:30 [INFO] train episode 4617: winner = 1, steps = 6\n",
      "03:42:31 [INFO] train episode 4618: winner = 1, steps = 4\n",
      "03:42:31 [INFO] train episode 4619: winner = 1, steps = 4\n",
      "03:42:34 [INFO] train episode 4620: winner = -1, steps = 7\n",
      "03:42:34 [INFO] train episode 4621: winner = 1, steps = 4\n",
      "03:42:34 [INFO] train episode 4622: winner = 0, steps = 8\n",
      "03:42:35 [INFO] train episode 4623: winner = 1, steps = 6\n",
      "03:42:40 [INFO] train episode 4624: winner = 0, steps = 8\n",
      "03:42:40 [INFO] train episode 4625: winner = 0, steps = 8\n",
      "03:42:42 [INFO] train episode 4626: winner = 1, steps = 6\n",
      "03:42:44 [INFO] train episode 4627: winner = 1, steps = 8\n",
      "03:42:44 [INFO] train episode 4628: winner = 1, steps = 4\n",
      "03:42:46 [INFO] train episode 4629: winner = 1, steps = 4\n",
      "03:42:46 [INFO] train episode 4630: winner = -1, steps = 5\n",
      "03:42:46 [INFO] train episode 4631: winner = 1, steps = 4\n",
      "03:42:47 [INFO] train episode 4632: winner = 0, steps = 8\n",
      "03:42:49 [INFO] train episode 4633: winner = -1, steps = 7\n",
      "03:42:49 [INFO] train episode 4634: winner = 1, steps = 6\n",
      "03:42:49 [INFO] train episode 4635: winner = 0, steps = 8\n",
      "03:42:52 [INFO] train episode 4636: winner = 1, steps = 6\n",
      "03:42:52 [INFO] train episode 4637: winner = 1, steps = 4\n",
      "03:42:53 [INFO] train episode 4638: winner = 1, steps = 8\n",
      "03:42:58 [INFO] train episode 4639: winner = 0, steps = 8\n",
      "03:42:58 [INFO] train episode 4640: winner = 1, steps = 6\n",
      "03:43:02 [INFO] train episode 4641: winner = 0, steps = 8\n",
      "03:43:02 [INFO] train episode 4642: winner = 1, steps = 4\n",
      "03:43:03 [INFO] train episode 4643: winner = 0, steps = 8\n",
      "03:43:06 [INFO] train episode 4644: winner = 1, steps = 6\n",
      "03:43:08 [INFO] train episode 4645: winner = -1, steps = 7\n",
      "03:43:08 [INFO] train episode 4646: winner = 0, steps = 8\n",
      "03:43:08 [INFO] train episode 4647: winner = 1, steps = 4\n",
      "03:43:08 [INFO] train episode 4648: winner = 1, steps = 6\n",
      "03:43:08 [INFO] train episode 4649: winner = 0, steps = 8\n",
      "03:43:10 [INFO] train episode 4650: winner = 0, steps = 8\n",
      "03:43:11 [INFO] train episode 4651: winner = 1, steps = 8\n",
      "03:43:11 [INFO] train episode 4652: winner = 0, steps = 8\n",
      "03:43:13 [INFO] train episode 4653: winner = -1, steps = 7\n",
      "03:43:14 [INFO] train episode 4654: winner = 0, steps = 8\n",
      "03:43:14 [INFO] train episode 4655: winner = -1, steps = 7\n",
      "03:43:20 [INFO] train episode 4656: winner = 1, steps = 6\n",
      "03:43:20 [INFO] train episode 4657: winner = 1, steps = 6\n",
      "03:43:20 [INFO] train episode 4658: winner = 0, steps = 8\n",
      "03:43:22 [INFO] train episode 4659: winner = 1, steps = 8\n",
      "03:43:25 [INFO] train episode 4660: winner = 0, steps = 8\n",
      "03:43:26 [INFO] train episode 4661: winner = 1, steps = 6\n",
      "03:43:26 [INFO] train episode 4662: winner = 1, steps = 4\n",
      "03:43:29 [INFO] train episode 4663: winner = 0, steps = 8\n",
      "03:43:31 [INFO] train episode 4664: winner = 1, steps = 6\n",
      "03:43:31 [INFO] train episode 4665: winner = 1, steps = 6\n",
      "03:43:34 [INFO] train episode 4666: winner = 0, steps = 8\n",
      "03:43:34 [INFO] train episode 4667: winner = 1, steps = 4\n",
      "03:43:34 [INFO] train episode 4668: winner = 1, steps = 4\n",
      "03:43:34 [INFO] train episode 4669: winner = 0, steps = 8\n",
      "03:43:34 [INFO] train episode 4670: winner = 0, steps = 8\n",
      "03:43:34 [INFO] train episode 4671: winner = 0, steps = 8\n",
      "03:43:34 [INFO] train episode 4672: winner = 1, steps = 4\n",
      "03:43:37 [INFO] train episode 4673: winner = 0, steps = 8\n",
      "03:43:37 [INFO] train episode 4674: winner = 0, steps = 8\n",
      "03:43:38 [INFO] train episode 4675: winner = 0, steps = 8\n",
      "03:43:39 [INFO] train episode 4676: winner = 1, steps = 6\n",
      "03:43:39 [INFO] train episode 4677: winner = 1, steps = 8\n",
      "03:43:42 [INFO] train episode 4678: winner = 1, steps = 4\n",
      "03:43:42 [INFO] test episode 4678:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "03:43:51 [INFO] step 0：player 1, action (1, 2)\n",
      "+++\n",
      "++o\n",
      "+++\n",
      "03:43:59 [INFO] step 1：player -1, action (0, 0)\n",
      "x++\n",
      "++o\n",
      "+++\n",
      "03:44:02 [INFO] step 2：player 1, action (2, 1)\n",
      "x++\n",
      "++o\n",
      "+o+\n",
      "03:44:05 [INFO] step 3：player -1, action (0, 2)\n",
      "x+x\n",
      "++o\n",
      "+o+\n",
      "03:44:05 [INFO] step 4：player 1, action (2, 0)\n",
      "x+x\n",
      "++o\n",
      "oo+\n",
      "03:44:06 [INFO] step 5：player -1, action (0, 1)\n",
      "xxx\n",
      "++o\n",
      "oo+\n",
      "03:44:06 [INFO] test episode 4678: winner = -1, steps = 5\n",
      "03:44:19 [INFO] train episode 4679: winner = 1, steps = 4\n",
      "03:44:30 [INFO] train episode 4680: winner = 1, steps = 4\n",
      "03:44:46 [INFO] train episode 4681: winner = 0, steps = 8\n",
      "03:44:54 [INFO] train episode 4682: winner = 1, steps = 6\n",
      "03:44:57 [INFO] train episode 4683: winner = 1, steps = 6\n",
      "03:45:07 [INFO] train episode 4684: winner = 1, steps = 4\n",
      "03:45:12 [INFO] train episode 4685: winner = 0, steps = 8\n",
      "03:45:26 [INFO] train episode 4686: winner = 1, steps = 6\n",
      "03:45:28 [INFO] train episode 4687: winner = 1, steps = 6\n",
      "03:45:39 [INFO] train episode 4688: winner = 0, steps = 8\n",
      "03:45:45 [INFO] train episode 4689: winner = 0, steps = 8\n",
      "03:45:47 [INFO] train episode 4690: winner = 1, steps = 4\n",
      "03:45:49 [INFO] train episode 4691: winner = 1, steps = 4\n",
      "03:45:51 [INFO] train episode 4692: winner = 1, steps = 6\n",
      "03:45:56 [INFO] train episode 4693: winner = 0, steps = 8\n",
      "03:45:59 [INFO] train episode 4694: winner = 0, steps = 8\n",
      "03:46:01 [INFO] train episode 4695: winner = 1, steps = 6\n",
      "03:46:04 [INFO] train episode 4696: winner = 1, steps = 6\n",
      "03:46:06 [INFO] train episode 4697: winner = 1, steps = 6\n",
      "03:46:10 [INFO] train episode 4698: winner = -1, steps = 5\n",
      "03:46:12 [INFO] train episode 4699: winner = 1, steps = 4\n",
      "03:46:17 [INFO] train episode 4700: winner = 1, steps = 4\n",
      "03:46:20 [INFO] train episode 4701: winner = -1, steps = 7\n",
      "03:46:26 [INFO] train episode 4702: winner = 0, steps = 8\n",
      "03:46:31 [INFO] train episode 4703: winner = 0, steps = 8\n",
      "03:46:31 [INFO] train episode 4704: winner = 1, steps = 4\n",
      "03:46:32 [INFO] train episode 4705: winner = 1, steps = 6\n",
      "03:46:33 [INFO] train episode 4706: winner = 1, steps = 6\n",
      "03:46:36 [INFO] train episode 4707: winner = 1, steps = 4\n",
      "03:46:36 [INFO] train episode 4708: winner = 1, steps = 4\n",
      "03:46:38 [INFO] train episode 4709: winner = 1, steps = 4\n",
      "03:46:41 [INFO] train episode 4710: winner = -1, steps = 5\n",
      "03:46:43 [INFO] train episode 4711: winner = 1, steps = 4\n",
      "03:46:46 [INFO] train episode 4712: winner = 1, steps = 4\n",
      "03:46:47 [INFO] train episode 4713: winner = 1, steps = 6\n",
      "03:46:49 [INFO] train episode 4714: winner = 1, steps = 6\n",
      "03:46:53 [INFO] train episode 4715: winner = -1, steps = 7\n",
      "03:46:57 [INFO] train episode 4716: winner = 1, steps = 4\n",
      "03:47:00 [INFO] train episode 4717: winner = 0, steps = 8\n",
      "03:47:03 [INFO] train episode 4718: winner = -1, steps = 5\n",
      "03:47:09 [INFO] train episode 4719: winner = -1, steps = 5\n",
      "03:47:10 [INFO] train episode 4720: winner = 0, steps = 8\n",
      "03:47:20 [INFO] train episode 4721: winner = 0, steps = 8\n",
      "03:47:21 [INFO] train episode 4722: winner = 1, steps = 4\n",
      "03:47:24 [INFO] train episode 4723: winner = 1, steps = 6\n",
      "03:47:26 [INFO] train episode 4724: winner = 1, steps = 6\n",
      "03:47:30 [INFO] train episode 4725: winner = 0, steps = 8\n",
      "03:47:32 [INFO] train episode 4726: winner = -1, steps = 7\n",
      "03:47:38 [INFO] train episode 4727: winner = 0, steps = 8\n",
      "03:47:41 [INFO] train episode 4728: winner = 0, steps = 8\n",
      "03:47:42 [INFO] train episode 4729: winner = 1, steps = 4\n",
      "03:47:42 [INFO] train episode 4730: winner = 1, steps = 6\n",
      "03:47:47 [INFO] train episode 4731: winner = 0, steps = 8\n",
      "03:47:50 [INFO] train episode 4732: winner = 0, steps = 8\n",
      "03:47:50 [INFO] train episode 4733: winner = 0, steps = 8\n",
      "03:47:50 [INFO] train episode 4734: winner = 1, steps = 6\n",
      "03:47:51 [INFO] train episode 4735: winner = 1, steps = 4\n",
      "03:47:53 [INFO] train episode 4736: winner = 1, steps = 6\n",
      "03:47:55 [INFO] train episode 4737: winner = 0, steps = 8\n",
      "03:47:59 [INFO] train episode 4738: winner = 0, steps = 8\n",
      "03:47:59 [INFO] train episode 4739: winner = 1, steps = 4\n",
      "03:48:00 [INFO] train episode 4740: winner = 1, steps = 6\n",
      "03:48:01 [INFO] train episode 4741: winner = 1, steps = 8\n",
      "03:48:01 [INFO] train episode 4742: winner = 1, steps = 6\n",
      "03:48:04 [INFO] train episode 4743: winner = 0, steps = 8\n",
      "03:48:04 [INFO] train episode 4744: winner = 1, steps = 6\n",
      "03:48:08 [INFO] train episode 4745: winner = 0, steps = 8\n",
      "03:48:11 [INFO] train episode 4746: winner = 1, steps = 8\n",
      "03:48:17 [INFO] train episode 4747: winner = 1, steps = 8\n",
      "03:48:18 [INFO] train episode 4748: winner = 1, steps = 4\n",
      "03:48:21 [INFO] train episode 4749: winner = 0, steps = 8\n",
      "03:48:21 [INFO] train episode 4750: winner = 0, steps = 8\n",
      "03:48:22 [INFO] train episode 4751: winner = 1, steps = 6\n",
      "03:48:25 [INFO] train episode 4752: winner = 0, steps = 8\n",
      "03:48:25 [INFO] train episode 4753: winner = 1, steps = 4\n",
      "03:48:25 [INFO] train episode 4754: winner = 1, steps = 4\n",
      "03:48:27 [INFO] train episode 4755: winner = 0, steps = 8\n",
      "03:48:32 [INFO] train episode 4756: winner = 0, steps = 8\n",
      "03:48:33 [INFO] train episode 4757: winner = 1, steps = 6\n",
      "03:48:34 [INFO] train episode 4758: winner = -1, steps = 7\n",
      "03:48:34 [INFO] train episode 4759: winner = 1, steps = 4\n",
      "03:48:34 [INFO] train episode 4760: winner = 1, steps = 4\n",
      "03:48:34 [INFO] train episode 4761: winner = 0, steps = 8\n",
      "03:48:40 [INFO] train episode 4762: winner = 0, steps = 8\n",
      "03:48:42 [INFO] train episode 4763: winner = 1, steps = 6\n",
      "03:48:44 [INFO] train episode 4764: winner = 0, steps = 8\n",
      "03:48:45 [INFO] train episode 4765: winner = 0, steps = 8\n",
      "03:48:46 [INFO] train episode 4766: winner = 0, steps = 8\n",
      "03:48:48 [INFO] train episode 4767: winner = 1, steps = 6\n",
      "03:48:49 [INFO] train episode 4768: winner = 0, steps = 8\n",
      "03:48:50 [INFO] train episode 4769: winner = 1, steps = 4\n",
      "03:48:51 [INFO] train episode 4770: winner = 1, steps = 6\n",
      "03:48:52 [INFO] train episode 4771: winner = 1, steps = 6\n",
      "03:48:54 [INFO] train episode 4772: winner = 1, steps = 4\n",
      "03:48:55 [INFO] train episode 4773: winner = 1, steps = 6\n",
      "03:48:58 [INFO] train episode 4774: winner = 0, steps = 8\n",
      "03:49:00 [INFO] train episode 4775: winner = 0, steps = 8\n",
      "03:49:02 [INFO] train episode 4776: winner = 0, steps = 8\n",
      "03:49:02 [INFO] train episode 4777: winner = 1, steps = 4\n",
      "03:49:02 [INFO] train episode 4778: winner = 1, steps = 4\n",
      "03:49:04 [INFO] train episode 4779: winner = 0, steps = 8\n",
      "03:49:04 [INFO] train episode 4780: winner = 1, steps = 4\n",
      "03:49:05 [INFO] train episode 4781: winner = 1, steps = 4\n",
      "03:49:07 [INFO] train episode 4782: winner = 1, steps = 8\n",
      "03:49:09 [INFO] train episode 4783: winner = 0, steps = 8\n",
      "03:49:11 [INFO] train episode 4784: winner = 0, steps = 8\n",
      "03:49:12 [INFO] train episode 4785: winner = 0, steps = 8\n",
      "03:49:13 [INFO] train episode 4786: winner = 1, steps = 6\n",
      "03:49:15 [INFO] train episode 4787: winner = 0, steps = 8\n",
      "03:49:15 [INFO] train episode 4788: winner = 0, steps = 8\n",
      "03:49:17 [INFO] train episode 4789: winner = 1, steps = 6\n",
      "03:49:17 [INFO] train episode 4790: winner = 0, steps = 8\n",
      "03:49:18 [INFO] train episode 4791: winner = 1, steps = 4\n",
      "03:49:20 [INFO] train episode 4792: winner = 1, steps = 6\n",
      "03:49:23 [INFO] train episode 4793: winner = 1, steps = 6\n",
      "03:49:26 [INFO] train episode 4794: winner = 0, steps = 8\n",
      "03:49:27 [INFO] train episode 4795: winner = 0, steps = 8\n",
      "03:49:28 [INFO] train episode 4796: winner = 1, steps = 4\n",
      "03:49:29 [INFO] train episode 4797: winner = -1, steps = 7\n",
      "03:49:30 [INFO] train episode 4798: winner = 0, steps = 8\n",
      "03:49:32 [INFO] train episode 4799: winner = 0, steps = 8\n",
      "03:49:32 [INFO] train episode 4800: winner = 1, steps = 6\n",
      "03:49:33 [INFO] train episode 4801: winner = 0, steps = 8\n",
      "03:49:33 [INFO] train episode 4802: winner = 1, steps = 4\n",
      "03:49:34 [INFO] train episode 4803: winner = 1, steps = 4\n",
      "03:49:34 [INFO] train episode 4804: winner = 0, steps = 8\n",
      "03:49:35 [INFO] train episode 4805: winner = -1, steps = 5\n",
      "03:49:37 [INFO] train episode 4806: winner = 1, steps = 4\n",
      "03:49:38 [INFO] train episode 4807: winner = 1, steps = 6\n",
      "03:49:40 [INFO] train episode 4808: winner = 0, steps = 8\n",
      "03:49:40 [INFO] train episode 4809: winner = 1, steps = 6\n",
      "03:49:41 [INFO] train episode 4810: winner = -1, steps = 5\n",
      "03:49:46 [INFO] train episode 4811: winner = 0, steps = 8\n",
      "03:49:46 [INFO] train episode 4812: winner = 0, steps = 8\n",
      "03:49:49 [INFO] train episode 4813: winner = -1, steps = 7\n",
      "03:49:50 [INFO] train episode 4814: winner = 1, steps = 4\n",
      "03:49:51 [INFO] train episode 4815: winner = 0, steps = 8\n",
      "03:49:51 [INFO] test episode 4815:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "03:50:00 [INFO] step 0：player 1, action (0, 2)\n",
      "++o\n",
      "+++\n",
      "+++\n",
      "03:50:08 [INFO] step 1：player -1, action (2, 2)\n",
      "++o\n",
      "+++\n",
      "++x\n",
      "03:50:10 [INFO] step 2：player 1, action (0, 0)\n",
      "o+o\n",
      "+++\n",
      "++x\n",
      "03:50:10 [INFO] step 3：player -1, action (2, 0)\n",
      "o+o\n",
      "+++\n",
      "x+x\n",
      "03:50:11 [INFO] step 4：player 1, action (0, 1)\n",
      "ooo\n",
      "+++\n",
      "x+x\n",
      "03:50:11 [INFO] test episode 4815: winner = 1, steps = 4\n",
      "03:50:29 [INFO] train episode 4816: winner = 1, steps = 6\n",
      "03:50:47 [INFO] train episode 4817: winner = 0, steps = 8\n",
      "03:50:48 [INFO] train episode 4818: winner = 1, steps = 4\n",
      "03:50:56 [INFO] train episode 4819: winner = 0, steps = 8\n",
      "03:51:04 [INFO] train episode 4820: winner = 0, steps = 8\n",
      "03:51:12 [INFO] train episode 4821: winner = 1, steps = 6\n",
      "03:51:19 [INFO] train episode 4822: winner = 1, steps = 6\n",
      "03:51:24 [INFO] train episode 4823: winner = 1, steps = 6\n",
      "03:51:35 [INFO] train episode 4824: winner = 1, steps = 4\n",
      "03:51:38 [INFO] train episode 4825: winner = 1, steps = 6\n",
      "03:51:49 [INFO] train episode 4826: winner = 1, steps = 4\n",
      "03:51:58 [INFO] train episode 4827: winner = 0, steps = 8\n",
      "03:52:05 [INFO] train episode 4828: winner = 0, steps = 8\n",
      "03:52:08 [INFO] train episode 4829: winner = 1, steps = 4\n",
      "03:52:09 [INFO] train episode 4830: winner = 0, steps = 8\n",
      "03:52:14 [INFO] train episode 4831: winner = 1, steps = 6\n",
      "03:52:19 [INFO] train episode 4832: winner = 0, steps = 8\n",
      "03:52:23 [INFO] train episode 4833: winner = 1, steps = 6\n",
      "03:52:28 [INFO] train episode 4834: winner = 1, steps = 6\n",
      "03:52:37 [INFO] train episode 4835: winner = 0, steps = 8\n",
      "03:52:40 [INFO] train episode 4836: winner = -1, steps = 7\n",
      "03:52:46 [INFO] train episode 4837: winner = -1, steps = 7\n",
      "03:52:49 [INFO] train episode 4838: winner = 1, steps = 4\n",
      "03:52:51 [INFO] train episode 4839: winner = 1, steps = 4\n",
      "03:52:52 [INFO] train episode 4840: winner = 1, steps = 4\n",
      "03:52:57 [INFO] train episode 4841: winner = 1, steps = 6\n",
      "03:52:58 [INFO] train episode 4842: winner = 1, steps = 6\n",
      "03:53:04 [INFO] train episode 4843: winner = 0, steps = 8\n",
      "03:53:05 [INFO] train episode 4844: winner = -1, steps = 7\n",
      "03:53:09 [INFO] train episode 4845: winner = 1, steps = 6\n",
      "03:53:09 [INFO] train episode 4846: winner = -1, steps = 5\n",
      "03:53:10 [INFO] train episode 4847: winner = 1, steps = 4\n",
      "03:53:11 [INFO] train episode 4848: winner = 1, steps = 6\n",
      "03:53:12 [INFO] train episode 4849: winner = 0, steps = 8\n",
      "03:53:14 [INFO] train episode 4850: winner = 0, steps = 8\n",
      "03:53:30 [INFO] train episode 4851: winner = 0, steps = 8\n",
      "03:53:31 [INFO] train episode 4852: winner = 0, steps = 8\n",
      "03:53:31 [INFO] train episode 4853: winner = 1, steps = 6\n",
      "03:53:33 [INFO] train episode 4854: winner = 1, steps = 6\n",
      "03:53:38 [INFO] train episode 4855: winner = 0, steps = 8\n",
      "03:53:41 [INFO] train episode 4856: winner = 1, steps = 4\n",
      "03:53:44 [INFO] train episode 4857: winner = 1, steps = 4\n",
      "03:53:44 [INFO] train episode 4858: winner = 0, steps = 8\n",
      "03:53:44 [INFO] train episode 4859: winner = 1, steps = 6\n",
      "03:53:46 [INFO] train episode 4860: winner = 1, steps = 6\n",
      "03:53:47 [INFO] train episode 4861: winner = 1, steps = 4\n",
      "03:53:50 [INFO] train episode 4862: winner = 1, steps = 6\n",
      "03:53:54 [INFO] train episode 4863: winner = 0, steps = 8\n",
      "03:53:56 [INFO] train episode 4864: winner = 0, steps = 8\n",
      "03:53:56 [INFO] train episode 4865: winner = 1, steps = 6\n",
      "03:53:58 [INFO] train episode 4866: winner = 1, steps = 4\n",
      "03:54:03 [INFO] train episode 4867: winner = 1, steps = 8\n",
      "03:54:03 [INFO] train episode 4868: winner = 1, steps = 6\n",
      "03:54:09 [INFO] train episode 4869: winner = 0, steps = 8\n",
      "03:54:12 [INFO] train episode 4870: winner = 0, steps = 8\n",
      "03:54:13 [INFO] train episode 4871: winner = 1, steps = 6\n",
      "03:54:15 [INFO] train episode 4872: winner = -1, steps = 7\n",
      "03:54:16 [INFO] train episode 4873: winner = 1, steps = 6\n",
      "03:54:17 [INFO] train episode 4874: winner = 1, steps = 4\n",
      "03:54:19 [INFO] train episode 4875: winner = 1, steps = 6\n",
      "03:54:21 [INFO] train episode 4876: winner = 1, steps = 6\n",
      "03:54:22 [INFO] train episode 4877: winner = 0, steps = 8\n",
      "03:54:27 [INFO] train episode 4878: winner = 1, steps = 6\n",
      "03:54:33 [INFO] train episode 4879: winner = 1, steps = 8\n",
      "03:54:34 [INFO] train episode 4880: winner = 0, steps = 8\n",
      "03:54:35 [INFO] train episode 4881: winner = 1, steps = 6\n",
      "03:54:38 [INFO] train episode 4882: winner = 1, steps = 6\n",
      "03:54:39 [INFO] train episode 4883: winner = 0, steps = 8\n",
      "03:54:40 [INFO] train episode 4884: winner = 1, steps = 4\n",
      "03:54:41 [INFO] train episode 4885: winner = 0, steps = 8\n",
      "03:54:42 [INFO] train episode 4886: winner = 1, steps = 6\n",
      "03:54:42 [INFO] train episode 4887: winner = 1, steps = 6\n",
      "03:54:43 [INFO] train episode 4888: winner = 1, steps = 6\n",
      "03:54:46 [INFO] train episode 4889: winner = 0, steps = 8\n",
      "03:54:46 [INFO] train episode 4890: winner = 1, steps = 4\n",
      "03:54:49 [INFO] train episode 4891: winner = -1, steps = 7\n",
      "03:54:51 [INFO] train episode 4892: winner = 1, steps = 6\n",
      "03:54:51 [INFO] train episode 4893: winner = 1, steps = 6\n",
      "03:54:53 [INFO] train episode 4894: winner = 0, steps = 8\n",
      "03:54:54 [INFO] train episode 4895: winner = 1, steps = 6\n",
      "03:54:56 [INFO] train episode 4896: winner = 1, steps = 4\n",
      "03:54:56 [INFO] train episode 4897: winner = 0, steps = 8\n",
      "03:54:56 [INFO] train episode 4898: winner = 1, steps = 4\n",
      "03:54:57 [INFO] train episode 4899: winner = 1, steps = 6\n",
      "03:54:59 [INFO] train episode 4900: winner = 1, steps = 6\n",
      "03:54:59 [INFO] train episode 4901: winner = 1, steps = 6\n",
      "03:54:59 [INFO] train episode 4902: winner = 1, steps = 6\n",
      "03:55:01 [INFO] train episode 4903: winner = 1, steps = 4\n",
      "03:55:01 [INFO] train episode 4904: winner = 0, steps = 8\n",
      "03:55:02 [INFO] train episode 4905: winner = 0, steps = 8\n",
      "03:55:04 [INFO] train episode 4906: winner = 1, steps = 6\n",
      "03:55:04 [INFO] train episode 4907: winner = 1, steps = 6\n",
      "03:55:05 [INFO] train episode 4908: winner = 1, steps = 4\n",
      "03:55:08 [INFO] train episode 4909: winner = 0, steps = 8\n",
      "03:55:10 [INFO] train episode 4910: winner = 0, steps = 8\n",
      "03:55:11 [INFO] train episode 4911: winner = 0, steps = 8\n",
      "03:55:14 [INFO] train episode 4912: winner = -1, steps = 5\n",
      "03:55:16 [INFO] train episode 4913: winner = 1, steps = 6\n",
      "03:55:17 [INFO] train episode 4914: winner = 0, steps = 8\n",
      "03:55:19 [INFO] train episode 4915: winner = -1, steps = 5\n",
      "03:55:21 [INFO] train episode 4916: winner = 0, steps = 8\n",
      "03:55:24 [INFO] train episode 4917: winner = 1, steps = 8\n",
      "03:55:24 [INFO] train episode 4918: winner = -1, steps = 7\n",
      "03:55:25 [INFO] train episode 4919: winner = 0, steps = 8\n",
      "03:55:26 [INFO] train episode 4920: winner = 1, steps = 8\n",
      "03:55:29 [INFO] train episode 4921: winner = 1, steps = 8\n",
      "03:55:32 [INFO] train episode 4922: winner = 1, steps = 6\n",
      "03:55:33 [INFO] train episode 4923: winner = 1, steps = 4\n",
      "03:55:36 [INFO] train episode 4924: winner = 0, steps = 8\n",
      "03:55:37 [INFO] train episode 4925: winner = 1, steps = 6\n",
      "03:55:39 [INFO] train episode 4926: winner = 1, steps = 6\n",
      "03:55:41 [INFO] train episode 4927: winner = 1, steps = 6\n",
      "03:55:41 [INFO] train episode 4928: winner = 0, steps = 8\n",
      "03:55:42 [INFO] train episode 4929: winner = 1, steps = 4\n",
      "03:55:43 [INFO] train episode 4930: winner = 1, steps = 4\n",
      "03:55:44 [INFO] train episode 4931: winner = 0, steps = 8\n",
      "03:55:44 [INFO] train episode 4932: winner = 0, steps = 8\n",
      "03:55:45 [INFO] train episode 4933: winner = 1, steps = 6\n",
      "03:55:47 [INFO] train episode 4934: winner = 0, steps = 8\n",
      "03:55:49 [INFO] train episode 4935: winner = 0, steps = 8\n",
      "03:55:50 [INFO] train episode 4936: winner = 1, steps = 6\n",
      "03:55:50 [INFO] train episode 4937: winner = 1, steps = 6\n",
      "03:55:50 [INFO] train episode 4938: winner = 0, steps = 8\n",
      "03:55:51 [INFO] train episode 4939: winner = 0, steps = 8\n",
      "03:55:51 [INFO] train episode 4940: winner = 1, steps = 4\n",
      "03:55:53 [INFO] train episode 4941: winner = -1, steps = 5\n",
      "03:55:56 [INFO] train episode 4942: winner = 0, steps = 8\n",
      "03:55:57 [INFO] train episode 4943: winner = 1, steps = 6\n",
      "03:55:59 [INFO] train episode 4944: winner = 1, steps = 6\n",
      "03:55:59 [INFO] train episode 4945: winner = 1, steps = 6\n",
      "03:56:00 [INFO] train episode 4946: winner = 0, steps = 8\n",
      "03:56:02 [INFO] train episode 4947: winner = 1, steps = 6\n",
      "03:56:06 [INFO] train episode 4948: winner = 0, steps = 8\n",
      "03:56:06 [INFO] train episode 4949: winner = -1, steps = 7\n",
      "03:56:08 [INFO] train episode 4950: winner = 1, steps = 8\n",
      "03:56:08 [INFO] test episode 4950:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "03:56:17 [INFO] step 0：player 1, action (2, 2)\n",
      "+++\n",
      "+++\n",
      "++o\n",
      "03:56:24 [INFO] step 1：player -1, action (0, 0)\n",
      "x++\n",
      "+++\n",
      "++o\n",
      "03:56:27 [INFO] step 2：player 1, action (2, 0)\n",
      "x++\n",
      "+++\n",
      "o+o\n",
      "03:56:28 [INFO] step 3：player -1, action (2, 1)\n",
      "x++\n",
      "+++\n",
      "oxo\n",
      "03:56:28 [INFO] step 4：player 1, action (0, 2)\n",
      "x+o\n",
      "+++\n",
      "oxo\n",
      "03:56:28 [INFO] step 5：player -1, action (1, 2)\n",
      "x+o\n",
      "++x\n",
      "oxo\n",
      "03:56:29 [INFO] step 6：player 1, action (1, 0)\n",
      "x+o\n",
      "o+x\n",
      "oxo\n",
      "03:56:29 [INFO] step 7：player -1, action (1, 1)\n",
      "x+o\n",
      "oxx\n",
      "oxo\n",
      "03:56:29 [INFO] step 8：player 1, action (0, 1)\n",
      "xoo\n",
      "oxx\n",
      "oxo\n",
      "03:56:29 [INFO] test episode 4950: winner = 0, steps = 8\n",
      "03:56:34 [INFO] train episode 4951: winner = -1, steps = 5\n",
      "03:56:46 [INFO] train episode 4952: winner = 1, steps = 6\n",
      "03:56:56 [INFO] train episode 4953: winner = 0, steps = 8\n",
      "03:57:06 [INFO] train episode 4954: winner = 1, steps = 4\n",
      "03:57:25 [INFO] train episode 4955: winner = 1, steps = 8\n",
      "03:57:31 [INFO] train episode 4956: winner = 1, steps = 6\n",
      "03:57:42 [INFO] train episode 4957: winner = 0, steps = 8\n",
      "03:57:47 [INFO] train episode 4958: winner = 0, steps = 8\n",
      "03:57:51 [INFO] train episode 4959: winner = 0, steps = 8\n",
      "03:57:55 [INFO] train episode 4960: winner = 1, steps = 6\n",
      "03:57:59 [INFO] train episode 4961: winner = 1, steps = 6\n",
      "03:58:08 [INFO] train episode 4962: winner = 1, steps = 4\n",
      "03:58:16 [INFO] train episode 4963: winner = 0, steps = 8\n",
      "03:58:18 [INFO] train episode 4964: winner = 1, steps = 4\n",
      "03:58:19 [INFO] train episode 4965: winner = 1, steps = 4\n",
      "03:58:28 [INFO] train episode 4966: winner = 1, steps = 4\n",
      "03:58:37 [INFO] train episode 4967: winner = 1, steps = 4\n",
      "03:58:47 [INFO] train episode 4968: winner = 1, steps = 6\n",
      "03:58:50 [INFO] train episode 4969: winner = 1, steps = 6\n",
      "03:58:52 [INFO] train episode 4970: winner = 1, steps = 4\n",
      "03:58:53 [INFO] train episode 4971: winner = 1, steps = 4\n",
      "03:58:54 [INFO] train episode 4972: winner = 0, steps = 8\n",
      "03:58:58 [INFO] train episode 4973: winner = 1, steps = 6\n",
      "03:59:03 [INFO] train episode 4974: winner = 0, steps = 8\n",
      "03:59:08 [INFO] train episode 4975: winner = 0, steps = 8\n",
      "03:59:10 [INFO] train episode 4976: winner = 1, steps = 6\n",
      "03:59:13 [INFO] train episode 4977: winner = 1, steps = 4\n",
      "03:59:14 [INFO] train episode 4978: winner = 1, steps = 4\n",
      "03:59:20 [INFO] train episode 4979: winner = 1, steps = 6\n",
      "03:59:22 [INFO] train episode 4980: winner = 1, steps = 8\n",
      "03:59:25 [INFO] train episode 4981: winner = -1, steps = 5\n",
      "03:59:25 [INFO] train episode 4982: winner = 1, steps = 4\n",
      "03:59:27 [INFO] train episode 4983: winner = 0, steps = 8\n",
      "03:59:30 [INFO] train episode 4984: winner = -1, steps = 5\n",
      "03:59:35 [INFO] train episode 4985: winner = 0, steps = 8\n",
      "03:59:39 [INFO] train episode 4986: winner = 1, steps = 6\n",
      "03:59:40 [INFO] train episode 4987: winner = 0, steps = 8\n",
      "03:59:41 [INFO] train episode 4988: winner = 1, steps = 6\n",
      "03:59:48 [INFO] train episode 4989: winner = 0, steps = 8\n",
      "03:59:50 [INFO] train episode 4990: winner = 1, steps = 6\n",
      "03:59:53 [INFO] train episode 4991: winner = 0, steps = 8\n",
      "03:59:57 [INFO] train episode 4992: winner = 1, steps = 8\n",
      "04:00:02 [INFO] train episode 4993: winner = 0, steps = 8\n",
      "04:00:04 [INFO] train episode 4994: winner = 1, steps = 4\n",
      "04:00:04 [INFO] train episode 4995: winner = 0, steps = 8\n",
      "04:00:06 [INFO] train episode 4996: winner = 0, steps = 8\n",
      "04:00:07 [INFO] train episode 4997: winner = 1, steps = 6\n",
      "04:00:09 [INFO] train episode 4998: winner = -1, steps = 5\n",
      "04:00:10 [INFO] train episode 4999: winner = -1, steps = 7\n"
     ]
    }
   ],
   "source": [
    "def play_boardgame2_episode(env, agent, mode=None, verbose=False):\n",
    "    observation, winner, done = env.reset(), 0, False\n",
    "    agent.reset(mode=mode)\n",
    "    elapsed_steps = 0\n",
    "    while True:\n",
    "        if verbose:\n",
    "            board, player = observation\n",
    "            print(boardgame2.strfboard(board))\n",
    "        action = agent.step(observation, winner, done)\n",
    "        if verbose:\n",
    "            logging.info('step %d：player %d, action %s', elapsed_steps, player,\n",
    "                    action)\n",
    "        observation, winner, done, _ = env.step(action)\n",
    "        if done:\n",
    "            if verbose:\n",
    "                board, _ = observation\n",
    "                print(boardgame2.strfboard(board))\n",
    "            break\n",
    "        elapsed_steps += 1\n",
    "    agent.close()\n",
    "    return winner, elapsed_steps\n",
    "\n",
    "\n",
    "for episode in range(5000):\n",
    "    winner, elapsed_steps = play_boardgame2_episode(env, agent, mode='train')\n",
    "    logging.info('train episode %d: winner = %d, steps = %d', episode, winner,\n",
    "            elapsed_steps)\n",
    "\n",
    "    if len(agent.replayer.memory) == 0: # just finish learning\n",
    "        logging.info('test episode %d:', episode)\n",
    "        winner, elapsed_steps = play_boardgame2_episode(env, agent, mode='test',\n",
    "                verbose=True)\n",
    "        logging.info('test episode %d: winner = %d, steps = %d',\n",
    "                episode, winner, elapsed_steps)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
